// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy 4x12 DS
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_4x12_DS] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(7.213e-02, 1.733e-02, -3.150e-02, 7.941e-02) * s0_0_0;
	r1 += V4(-8.719e-02, -5.164e-02, -9.595e-03, -7.228e-01) * s0_0_0;
	r2 += V4(-5.824e-02, 8.887e-01, -1.939e-01, 5.263e-01) * s0_0_0;
	r0 += V4(-1.067e-01, -7.984e-02, 7.952e-03, -5.510e-02) * s0_0_1;
	r1 += V4(-2.899e-01, -2.258e-01, -2.293e-02, -1.225e-01) * s0_0_1;
	r2 += V4(5.098e-01, 4.531e-02, -2.709e-01, 2.784e-01) * s0_0_1;
	r0 += V4(4.817e-03, 3.208e-02, 3.013e-02, -5.709e-03) * s0_0_2;
	r1 += V4(2.968e-02, -3.136e-03, 3.592e-03, -7.651e-03) * s0_0_2;
	r2 += V4(3.897e-02, 2.448e-02, -6.043e-02, 3.701e-02) * s0_0_2;
	r0 += V4(9.402e-01, -3.262e-02, 8.933e-02, 9.899e-01) * s0_1_0;
	r1 += V4(-8.804e-02, -7.544e-02, -2.538e-02, -3.894e-02) * s0_1_0;
	r2 += V4(-3.278e-02, -9.047e-01, -1.428e-01, -1.920e-02) * s0_1_0;
	r0 += V4(-8.417e-01, 3.517e-02, -5.337e-01, -9.473e-01) * s0_1_1;
	r1 += V4(-5.215e-01, -6.740e-01, 4.098e-01, 8.845e-01) * s0_1_1;
	r2 += V4(-4.230e-01, -1.704e-02, 7.559e-01, -7.090e-01) * s0_1_1;
	r0 += V4(-4.984e-02, -9.923e-02, 4.385e-01, -6.385e-02) * s0_1_2;
	r1 += V4(-1.509e-01, -1.208e-01, -1.174e-01, 5.624e-03) * s0_1_2;
	r2 += V4(-2.278e-02, -2.569e-02, -6.774e-03, -1.139e-01) * s0_1_2;
	r0 += V4(7.169e-02, 1.340e+00, -1.159e-01, 8.696e-02) * s0_2_0;
	r1 += V4(1.578e-01, 1.618e-01, 1.798e-02, 3.642e-03) * s0_2_0;
	r2 += V4(1.420e-02, 8.781e-03, -2.179e-02, -3.206e-02) * s0_2_0;
	r0 += V4(-1.150e-01, -7.690e-02, -4.444e-01, -1.325e-01) * s0_2_1;
	r1 += V4(8.634e-01, 8.553e-01, -1.158e-01, 1.544e-03) * s0_2_1;
	r2 += V4(-5.076e-03, -2.967e-02, 2.064e-02, -4.888e-02) * s0_2_1;
	r0 += V4(2.359e-02, 5.222e-02, 5.680e-01, 5.186e-02) * s0_2_2;
	r1 += V4(8.462e-02, 1.323e-01, 8.534e-02, -8.340e-04) * s0_2_2;
	r2 += V4(-1.776e-02, 7.646e-03, -1.835e-02, 8.228e-02) * s0_2_2;
	r0 += V4(1.752e-02, -1.156e+00, -2.883e-03, -2.400e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(1.910e-02, -1.065e-02, 1.682e-02, 1.024e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(3.166e-02, -4.865e-03, 1.062e-02, 1.609e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_DS] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(4.525e-01, 4.369e-02, -6.523e-02, 5.528e-02, -7.225e-02, -5.306e-02, -2.691e-01, 1.053e-02, -5.785e-02, 5.533e-02, 1.988e-01, -1.666e-02, -4.036e-01, -7.819e-03, -1.123e-01, -1.479e-01) * s0_0_0;
	r1 += M4(-8.111e-01, 1.592e-01, -2.338e-01, -5.243e-01, -2.836e-01, -9.209e-02, 7.551e-01, 1.045e-01, -1.694e-01, -4.740e-01, -5.926e-01, -4.895e-02, 2.247e-01, -1.384e-01, 4.077e-01, 5.918e-01) * s0_0_0;
	r2 += M4(3.389e-01, -7.612e-02, 1.234e-01, -1.098e-01, -8.708e-02, 2.750e-02, -3.664e-02, -7.139e-03, 5.497e-01, 5.756e-02, 3.994e-01, 5.429e-02, -2.562e-01, 1.270e-01, -1.342e-01, 1.061e-01) * s0_0_0;
	r0 += M4(1.431e-01, 2.058e-02, -1.654e-01, -1.684e-01, -6.804e-02, 9.344e-02, -4.696e-02, -2.219e-01, 4.249e-01, 9.565e-02, -1.236e-01, 1.329e-01, -2.196e-01, -1.396e-02, 6.589e-02, 2.882e-01) * s0_0_1;
	r1 += M4(1.534e-01, -5.074e-03, 1.934e-01, 3.656e-01, 2.120e-01, -4.807e-01, -3.932e-01, -1.000e+00, -1.576e-01, 1.960e-01, -4.327e-02, 1.702e-01, 4.753e-01, 1.728e-01, -5.948e-01, -3.576e-01) * s0_0_1;
	r2 += M4(5.532e-02, -5.140e-02, -1.808e-01, -2.101e-01, 5.133e-01, 3.882e-03, -6.728e-02, -1.682e-01, -6.789e-01, 8.155e-02, -1.119e-01, 3.587e-02, -1.000e+00, 6.215e-02, 2.705e-01, 2.006e-01) * s0_0_1;
	r0 += M4(-4.367e-01, -5.757e-03, 2.458e-02, -4.658e-01, 1.000e+00, -1.028e-01, -8.058e-02, -1.000e+00, -6.382e-01, -6.556e-03, -8.559e-03, -2.029e-02, -1.435e-02, 5.274e-03, 9.602e-02, 8.974e-02) * s0_0_2;
	r1 += M4(-2.636e-01, -1.083e-01, -6.573e-01, 2.088e-01, 8.646e-01, -7.493e-01, -1.000e+00, -1.000e+00, 2.259e-01, 7.785e-02, -5.288e-02, 2.286e-01, 6.154e-01, 7.697e-02, 3.928e-01, -1.233e-01) * s0_0_2;
	r2 += M4(-5.373e-01, 4.166e-02, 2.782e-01, 1.120e-01, 8.101e-01, 1.166e-01, 5.036e-02, 2.096e-01, 2.877e-02, 1.270e-02, 5.099e-02, 1.044e-01, 1.377e-01, -1.610e-03, -1.902e-01, 9.325e-02) * s0_0_2;
	r0 += M4(-2.744e-01, 4.637e-02, -2.653e-01, -3.103e-01, -6.331e-03, -9.510e-02, 2.049e-01, -3.123e-01, 1.800e-01, -9.515e-02, -1.374e-02, -4.483e-01, 3.636e-01, -1.314e-02, 1.845e-01, 2.981e-01) * s0_1_0;
	r1 += M4(1.487e-01, 8.205e-02, 1.000e+00, 6.782e-01, 2.460e-01, 1.763e-01, 3.134e-02, -2.723e-01, -6.436e-01, -2.380e-01, -7.597e-01, -1.000e+00, -3.715e-01, 2.359e-02, -1.000e+00, -4.762e-01) * s0_1_0;
	r2 += M4(6.012e-01, 1.167e-01, -2.959e-01, 8.208e-03, -5.854e-01, -3.772e-02, -4.350e-02, -4.684e-02, 1.052e-01, -1.164e-01, 5.567e-01, 7.235e-02, -6.535e-01, 2.592e-02, 2.249e-01, 2.652e-02) * s0_1_0;
	r0 += M4(-6.661e-01, 1.867e-01, -9.169e-01, -1.000e+00, 1.395e-02, -5.378e-02, 3.282e-01, 5.760e-01, -6.723e-01, -1.490e-01, -3.908e-01, 6.279e-01, 9.914e-01, 2.715e-01, 9.888e-01, 3.311e-01) * s0_1_1;
	r1 += M4(8.451e-01, -4.316e-01, 1.000e+00, 5.149e-01, -3.512e-01, -2.040e-01, 3.328e-01, 1.000e+00, -3.696e-01, 4.804e-01, 7.442e-02, -1.000e+00, -7.604e-01, 3.329e-01, -7.433e-01, -1.000e+00) * s0_1_1;
	r2 += M4(5.575e-01, 4.406e-01, -8.652e-01, 2.889e-01, -1.195e-01, 4.370e-02, -2.146e-02, -4.446e-02, -1.612e-01, 6.716e-02, -3.564e-01, 6.545e-02, -3.892e-01, -1.826e-03, 1.000e+00, -2.397e-01) * s0_1_1;
	r0 += M4(-1.000e+00, 4.732e-02, 3.675e-01, -3.843e-01, -6.845e-01, 6.003e-02, 2.811e-01, 2.716e-02, -9.515e-02, 2.456e-02, -1.459e-01, 1.312e-01, 1.000e+00, 8.423e-02, -2.054e-01, -1.000e+00) * s0_1_2;
	r1 += M4(6.339e-01, -2.739e-01, 1.000e+00, -1.000e+00, 3.092e-01, 1.056e-01, 5.497e-02, -1.000e+00, -3.033e-01, 8.174e-02, -1.124e-01, -3.212e-01, -5.517e-01, -1.000e+00, -8.760e-01, 4.951e-01) * s0_1_2;
	r2 += M4(5.842e-01, -9.372e-02, -5.607e-01, 1.000e+00, 5.893e-01, -1.165e-01, 3.343e-03, 1.706e-01, -3.504e-02, 2.593e-02, 1.542e-02, 1.753e-01, -4.202e-01, 4.206e-02, 4.531e-01, -3.814e-01) * s0_1_2;
	r0 += M4(1.343e-01, 1.984e-01, 1.479e-01, -8.596e-02, 1.418e-01, -7.675e-02, -7.059e-02, -1.981e-01, 9.358e-02, 3.440e-02, -1.652e-01, 1.128e-01, -1.122e-01, -9.606e-02, -7.816e-02, -2.480e-02) * s0_2_0;
	r1 += M4(7.877e-03, -2.385e-02, -2.524e-01, -3.159e-01, -2.390e-01, -1.278e-01, -2.076e-01, 2.221e-01, 3.196e-02, -9.006e-02, -2.155e-01, 4.616e-02, -1.000e+00, 4.455e-02, 2.625e-01, 4.615e-01) * s0_2_0;
	r2 += M4(-4.516e-01, -7.138e-03, 1.255e-01, -1.091e-01, 3.856e-03, -1.155e-01, 4.870e-02, -8.869e-02, -4.647e-02, 5.666e-02, 3.663e-02, -3.130e-02, 5.220e-01, 9.644e-02, -4.199e-02, 9.399e-02) * s0_2_0;
	r0 += M4(5.697e-01, 1.764e-01, -2.049e-02, 4.596e-02, -9.893e-02, -4.854e-02, -4.467e-01, -3.532e-02, 2.041e-01, 5.676e-02, -2.128e-01, -1.989e-01, -6.583e-01, 7.421e-02, -1.048e-01, 2.705e-01) * s0_2_1;
	r1 += M4(-7.751e-02, 1.898e-01, 4.855e-01, -9.057e-03, 1.838e-01, -1.485e-01, 9.779e-02, -1.084e-01, 3.427e-02, -4.955e-02, -5.681e-02, 3.600e-01, -2.606e-01, -5.009e-02, -1.665e-01, 1.104e-01) * s0_2_1;
	r2 += M4(-5.938e-01, -4.557e-01, 1.034e-02, -2.318e-01, 2.987e-01, 1.067e-01, -1.109e-01, -5.216e-02, -2.173e-01, 2.886e-02, -2.251e-02, 2.618e-02, 4.164e-01, 3.869e-01, -4.961e-02, 1.592e-01) * s0_2_1;
	r0 += M4(1.440e-01, -1.428e-01, -1.000e+00, -4.384e-01, 3.863e-02, 1.087e-01, 7.923e-03, 5.464e-01, 5.624e-02, 4.739e-02, -1.083e-02, 5.607e-02, -4.083e-03, 8.673e-02, 8.339e-01, 6.074e-01) * s0_2_2;
	r1 += M4(-7.847e-01, -3.370e-01, -1.473e-01, -7.342e-02, -6.045e-01, -7.932e-02, 9.923e-02, -2.977e-01, 2.445e-01, -2.435e-02, 7.008e-03, 1.177e-01, 5.213e-01, 2.923e-01, 2.231e-01, 2.458e-01) * s0_2_2;
	r2 += M4(-6.543e-01, 4.190e-01, 2.669e-01, 1.104e-01, 2.518e-02, 3.708e-02, 1.055e-02, 3.851e-02, 7.396e-02, 4.363e-02, 3.821e-02, -3.291e-02, 5.012e-01, -1.762e-01, -1.422e-01, -1.210e-01) * s0_2_2;
	r0 += M4(1.533e-01, -4.859e-02, -2.830e-01, -1.386e-02, -1.262e-01, 2.126e-02, 2.998e-01, 2.636e-01, -2.590e-01, 1.224e-01, 2.875e-02, -4.389e-01, -2.021e-01, -4.643e-03, -1.184e-01, 4.364e-03) * s1_0_0;
	r1 += M4(3.484e-01, -3.076e-01, 6.033e-01, 8.528e-02, -2.466e-01, 4.162e-01, -6.028e-01, -1.498e-01, -1.231e-01, -1.000e+00, -3.225e-01, -1.617e-01, -7.139e-03, 6.152e-02, 8.817e-02, 2.538e-01) * s1_0_0;
	r2 += M4(-2.106e-01, 1.795e-01, -7.666e-02, 1.622e-02, -3.568e-02, -1.431e-01, -7.442e-02, -8.288e-02, -1.424e-01, -7.044e-02, -1.800e-02, -1.066e-01, 1.200e-01, 4.770e-02, 6.736e-03, 2.381e-02) * s1_0_0;
	r0 += M4(-7.247e-01, 1.520e-01, 4.632e-01, 5.752e-01, 9.828e-01, -1.290e-01, -3.947e-01, -1.000e+00, 2.328e-01, 1.733e-01, -3.675e-02, 1.133e-01, -2.359e-01, 8.380e-03, -5.376e-02, -2.091e-01) * s1_0_1;
	r1 += M4(4.464e-01, 1.000e+00, 2.700e-01, -1.000e+00, 2.724e-01, -1.000e+00, -1.587e-01, -3.486e-01, 1.451e-01, -9.439e-01, -4.979e-01, -3.680e-01, 1.912e-01, -1.563e-01, -4.947e-02, 4.527e-01) * s1_0_1;
	r2 += M4(8.880e-02, -2.151e-01, 4.222e-03, 7.448e-02, -1.050e-01, 2.341e-01, 1.273e-01, -8.180e-02, 7.233e-01, 1.071e-01, 3.116e-01, -6.544e-01, -1.102e-01, 9.964e-02, -3.997e-02, 6.368e-02) * s1_0_1;
	r0 += M4(-8.778e-01, -1.693e-01, -8.996e-02, -1.059e-01, 1.000e+00, 1.395e-01, 6.854e-02, 7.641e-02, 1.000e+00, -2.703e-02, 2.204e-01, 3.631e-01, -2.478e-01, 3.567e-02, 1.091e-01, -1.879e-01) * s1_0_2;
	r1 += M4(-9.155e-01, -2.127e-01, -2.212e-01, 1.000e+00, 7.398e-01, 1.093e-01, 2.011e-01, -7.217e-01, 2.053e-02, -2.073e-01, -2.499e-01, -2.569e-01, 9.954e-02, -9.501e-02, -7.496e-02, -1.281e-02) * s1_0_2;
	r2 += M4(-2.540e-02, 1.368e-01, -6.341e-02, 3.096e-01, 7.778e-02, -1.333e-01, 1.826e-02, -3.359e-01, -3.413e-02, -1.082e-01, 1.376e-01, 8.173e-02, 7.861e-02, -1.957e-03, 3.883e-02, -1.422e-01) * s1_0_2;
	r0 += M4(3.902e-01, 8.912e-02, 4.804e-02, 5.257e-01, -2.356e-01, -3.418e-02, -7.840e-03, -6.119e-01, -7.029e-01, 1.527e-01, -1.567e-01, -4.426e-02, 8.844e-02, -8.449e-02, 5.009e-02, -1.987e-01) * s1_1_0;
	r1 += M4(2.404e-01, 4.700e-01, 4.639e-01, 3.042e-01, -2.866e-01, -4.190e-01, -7.951e-01, -1.000e+00, 1.122e-01, -9.982e-02, 6.917e-01, 1.654e-01, -3.838e-01, 5.634e-03, 2.782e-03, 2.321e-01) * s1_1_0;
	r2 += M4(-3.191e-02, 1.518e-01, -4.757e-01, 4.161e-03, 3.955e-02, -7.053e-02, 4.024e-01, 6.964e-02, 9.902e-03, -5.656e-02, -9.953e-01, 3.379e-02, 2.250e-03, 1.932e-02, 6.684e-02, -5.112e-03) * s1_1_0;
	r0 += M4(-6.346e-01, 2.657e-01, 1.000e+00, 7.239e-02, 3.807e-01, 4.620e-02, -8.415e-01, 2.226e-01, -6.572e-01, -3.526e-01, -4.954e-01, 9.243e-02, -1.449e-01, 2.916e-02, 1.201e-01, -7.342e-01) * s1_1_1;
	r1 += M4(4.437e-01, -3.995e-01, 9.528e-02, 1.000e+00, -8.847e-01, 4.912e-01, -8.574e-02, -1.000e+00, 1.421e-01, 1.000e-01, -2.353e-01, 1.734e-01, 2.065e-01, -1.000e+00, -3.691e-01, -1.000e+00) * s1_1_1;
	r2 += M4(4.407e-01, -6.170e-01, 7.439e-02, 5.623e-02, -8.295e-01, 3.838e-01, -1.430e-01, -1.083e-01, -1.816e-01, 5.222e-01, 6.915e-01, 8.334e-01, 5.674e-01, -1.092e-01, 7.726e-02, -1.216e-01) * s1_1_1;
	r0 += M4(-8.042e-01, -7.132e-02, -1.900e-01, 4.839e-01, 4.594e-01, 2.116e-02, -1.964e-02, -9.570e-02, 5.394e-01, -6.506e-02, -3.379e-02, -5.606e-01, -1.672e-01, 2.287e-03, 4.022e-01, 1.038e-01) * s1_1_2;
	r1 += M4(-6.191e-01, 1.667e-01, -2.180e-01, -1.000e+00, 6.503e-01, -1.165e-01, 3.144e-01, 3.179e-02, -2.402e-01, 2.609e-01, 2.074e-01, 2.944e-01, 2.674e-01, -2.703e-01, 6.321e-02, -4.968e-01) * s1_1_2;
	r2 += M4(-2.901e-01, 1.518e-01, 1.293e-02, -3.191e-01, 3.525e-01, -3.374e-02, -1.178e-01, 2.738e-01, 3.260e-02, -1.357e-01, 1.102e-01, -3.592e-01, -7.125e-01, -6.421e-02, 1.714e-01, -2.064e-02) * s1_1_2;
	r0 += M4(4.180e-01, 1.185e-02, -1.237e-01, -3.524e-02, -3.426e-01, 2.266e-02, 1.242e-01, 1.670e-01, -1.089e-01, -1.056e-01, 2.207e-01, -1.845e-01, 2.069e-01, -5.712e-02, 7.948e-02, -1.275e-01) * s1_2_0;
	r1 += M4(3.918e-01, 4.584e-02, 4.327e-01, -7.431e-01, -1.000e+00, -3.118e-02, -5.324e-01, 7.820e-01, -3.698e-02, -1.104e-01, 9.170e-02, 2.162e-01, -1.557e-01, 1.968e-02, 1.566e-01, -1.079e-02) * s1_2_0;
	r2 += M4(1.604e-01, -2.293e-02, -1.255e-01, -1.284e-01, -1.582e-01, -2.937e-02, 1.251e-01, 1.509e-01, 1.267e-01, 5.070e-02, -1.528e-01, 8.792e-02, 5.952e-02, -5.140e-02, -3.496e-02, 2.838e-02) * s1_2_0;
	r0 += M4(-1.234e-01, 2.193e-01, 8.231e-01, -2.781e-01, 6.898e-02, -1.991e-01, -1.000e+00, 2.028e-01, 1.227e-01, 5.549e-02, 1.319e-01, 6.597e-02, -2.676e-01, 3.197e-01, 1.202e-01, 2.065e-01) * s1_2_1;
	r1 += M4(4.223e-01, -2.191e-01, -1.475e-01, 4.877e-01, -5.292e-01, 3.451e-01, 2.111e-01, -4.517e-01, 1.019e-01, 2.717e-01, 2.534e-01, -2.021e-01, 3.702e-01, -5.322e-02, 6.309e-01, 2.959e-01) * s1_2_1;
	r2 += M4(1.642e-01, -4.619e-01, 1.665e-01, 1.899e-01, -5.926e-02, 6.387e-01, -2.322e-01, -1.712e-01, -4.387e-01, -1.615e-01, 2.771e-02, 6.555e-02, -1.194e-01, 2.703e-02, -2.684e-01, -1.909e-02) * s1_2_1;
	r0 += M4(-9.162e-02, -1.054e-01, -1.114e-02, -3.380e-01, 1.155e-01, 1.449e-01, -2.226e-01, 3.424e-01, -2.252e-01, -7.604e-03, 1.577e-01, 2.938e-01, 4.756e-01, 3.603e-01, 5.250e-01, 6.370e-02) * s1_2_2;
	r1 += M4(-3.853e-01, 1.358e-01, -1.162e-01, -2.990e-01, 3.347e-01, -1.388e-01, 1.372e-01, 2.400e-01, -1.107e-01, 4.564e-02, 1.313e-01, 2.096e-01, 7.561e-03, -5.074e-01, -8.288e-02, 5.752e-01) * s1_2_2;
	r2 += M4(-2.543e-01, 9.214e-03, -2.293e-04, -1.740e-01, 2.502e-01, -4.081e-02, 6.850e-02, 1.111e-01, 6.010e-02, -1.628e-01, -1.364e-01, -1.678e-02, 2.672e-01, -2.144e-01, 4.999e-01, -1.240e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-1.657e-01, -8.413e-02, 2.222e-01, 1.261e-01, -9.749e-02, 1.938e-02, -6.471e-02, -1.187e-01, 2.349e-01, 1.417e-02, 4.246e-01, 2.120e-01, 5.995e-02, -2.175e-02, 1.975e-01, 1.250e-01) * s0_0_0;
	r1 += M4(2.799e-01, 1.578e-01, -3.520e-01, -4.091e-01, 3.426e-02, -3.434e-02, -8.178e-02, 2.354e-01, -1.673e-01, 1.298e-02, -6.209e-01, -7.736e-01, 3.928e-01, 8.598e-02, 5.182e-02, -2.215e-01) * s0_0_0;
	r2 += M4(-2.912e-01, -9.264e-02, -1.644e-02, 7.647e-02, -1.161e-01, 4.063e-02, 1.053e-02, 2.010e-02, -1.022e-01, -2.006e-01, 1.546e-01, -1.126e-01, 1.369e-02, -3.540e-02, -7.765e-03, -5.724e-02) * s0_0_0;
	r0 += M4(5.481e-01, -3.068e-02, -2.133e-01, -2.588e-01, 2.642e-02, 4.576e-02, -3.763e-01, -2.821e-01, 8.797e-01, -5.587e-02, -2.571e-01, 2.818e-02, -5.521e-02, 1.941e-02, 2.467e-01, 2.844e-01) * s0_0_1;
	r1 += M4(-3.786e-01, -1.955e-01, 6.698e-01, 4.520e-02, -2.411e-01, -7.353e-02, 4.501e-01, 4.987e-01, -6.291e-01, 4.698e-01, 5.280e-01, 3.537e-01, -3.933e-01, -1.085e-01, -3.915e-01, -3.896e-01) * s0_0_1;
	r2 += M4(5.079e-01, 4.156e-02, -2.519e-01, 1.828e-01, 9.584e-02, 6.518e-02, -1.071e-01, 2.828e-02, 8.179e-02, -7.668e-02, -1.334e-01, 1.538e-01, -3.779e-01, -1.780e-02, 1.672e-01, -7.678e-02) * s0_0_1;
	r0 += M4(-3.902e-02, -1.394e-02, 4.935e-02, 5.405e-02, -6.152e-01, -2.858e-02, 7.085e-02, 9.150e-02, -3.255e-02, -1.363e-02, -6.342e-02, 3.645e-01, -3.275e-01, 6.583e-02, -8.567e-02, -7.259e-03) * s0_0_2;
	r1 += M4(3.596e-01, -1.692e-01, 1.358e-02, 1.598e-01, 3.984e-01, 4.775e-02, -2.996e-01, 2.260e-01, 1.480e-01, -8.677e-03, 7.814e-02, -1.422e-01, -8.254e-02, 2.860e-01, 2.774e-02, -8.560e-02) * s0_0_2;
	r2 += M4(1.832e-01, 1.209e-02, 4.321e-02, -2.107e-02, -1.000e+00, 2.664e-02, 1.347e-01, -1.527e-01, -9.529e-02, 2.009e-03, -1.441e-02, 7.949e-02, -9.047e-02, -7.935e-03, -2.137e-02, 2.163e-02) * s0_0_2;
	r0 += M4(6.722e-02, -1.985e-03, -7.458e-02, -1.387e-01, 2.287e-01, -1.464e-02, 2.255e-02, -9.317e-02, -1.582e-03, 2.977e-01, -2.834e-01, 3.917e-01, -2.469e-01, -1.093e-02, 2.212e-01, 1.275e-01) * s0_1_0;
	r1 += M4(7.174e-01, -9.672e-02, -7.820e-02, -2.269e-01, 9.172e-02, 4.689e-02, 1.137e-02, 1.613e-01, 4.968e-01, 1.348e-01, 2.742e-01, 1.158e-01, 1.762e-01, -1.519e-01, -3.971e-01, -3.791e-02) * s0_1_0;
	r2 += M4(-8.361e-01, 3.297e-03, -6.046e-02, 1.236e-01, -2.369e-01, 6.834e-03, -2.964e-02, 3.792e-03, -5.951e-02, 4.844e-02, -2.739e-01, 1.399e-02, 8.692e-02, -6.507e-02, -2.393e-02, 3.614e-02) * s0_1_0;
	r0 += M4(9.973e-01, -3.826e-03, -1.152e-01, 9.517e-01, 2.268e-01, -7.052e-02, -4.566e-02, -4.101e-01, -7.393e-01, -4.639e-01, -6.170e-01, 3.500e-01, -2.420e-02, -1.136e-01, -2.466e-01, 5.397e-01) * s0_1_1;
	r1 += M4(-1.000e+00, 3.447e-01, -5.169e-01, 1.530e-02, -9.637e-01, 3.975e-02, 5.098e-01, -2.545e-01, -5.406e-01, 3.225e-01, -4.535e-01, 5.458e-01, 1.328e-03, 1.000e+00, 1.311e-01, 1.000e+00) * s0_1_1;
	r2 += M4(-6.935e-02, -9.992e-02, 2.587e-02, -3.439e-01, 1.941e-01, 5.466e-02, 2.087e-01, 4.040e-03, -1.000e+00, 2.253e-01, 2.265e-01, -9.936e-02, 2.821e-01, 3.911e-02, -3.351e-01, 3.715e-02) * s0_1_1;
	r0 += M4(-4.077e-01, 4.931e-02, -2.534e-01, -2.222e-01, -2.759e-01, 2.487e-01, 3.336e-01, -4.181e-01, 3.784e-01, 1.755e-01, -7.672e-03, -1.532e-03, -7.565e-01, -1.066e-01, -7.258e-02, -1.003e-01) * s0_1_2;
	r1 += M4(4.802e-01, 2.454e-01, 1.322e-01, -3.954e-01, 3.190e-03, -3.070e-01, 4.088e-01, -1.000e+00, 8.693e-02, 2.179e-01, -3.873e-02, 3.196e-01, 7.362e-01, 1.704e-02, -1.541e-01, 4.068e-01) * s0_1_2;
	r2 += M4(-3.709e-01, 1.979e-02, 1.326e-01, -8.777e-02, -2.466e-01, -1.961e-02, 1.103e-01, 1.628e-01, 5.200e-01, -4.403e-02, 3.626e-02, 9.711e-03, 7.016e-01, 2.913e-02, -1.842e-01, 1.714e-01) * s0_1_2;
	r0 += M4(-5.976e-01, -1.094e-01, 2.220e-01, 1.775e-01, 2.848e-02, 6.640e-02, 8.792e-03, 2.828e-02, -2.436e-01, 1.155e-01, -1.571e-01, -6.757e-03, -1.182e-01, -1.948e-01, 6.052e-02, 5.089e-02) * s0_2_0;
	r1 += M4(1.382e-01, 1.107e-01, -1.000e+00, 2.253e-01, -1.003e-01, -2.162e-02, 2.278e-01, 2.980e-01, 4.545e-01, -3.240e-02, 8.779e-02, -1.775e-01, 2.311e-01, 5.257e-02, -1.067e-02, -5.724e-01) * s0_2_0;
	r2 += M4(2.040e-01, -1.676e-01, 2.082e-01, 1.145e-02, 1.178e-01, 3.500e-02, -5.153e-02, -2.826e-02, -3.488e-03, 2.555e-01, -5.689e-02, -5.668e-02, -4.044e-02, -1.951e-01, 1.019e-01, 6.668e-02) * s0_2_0;
	r0 += M4(1.772e-01, -3.571e-01, -1.000e+00, -3.249e-01, 1.329e-01, 1.860e-01, 7.041e-02, -5.455e-01, 2.680e-01, 1.967e-01, 7.283e-01, -8.092e-02, 2.517e-01, -2.377e-01, -6.791e-02, -5.724e-01) * s0_2_1;
	r1 += M4(-1.615e-01, -2.813e-01, -4.486e-01, -1.638e-01, -9.919e-02, -7.392e-02, 6.523e-01, 4.022e-01, 1.060e-01, 4.052e-01, -1.433e-01, -3.509e-01, -7.241e-01, -9.243e-02, -2.141e-01, -6.273e-01) * s0_2_1;
	r2 += M4(2.385e-01, 1.000e+00, 5.168e-02, 2.359e-03, 8.010e-02, 2.903e-01, -1.256e-01, -9.660e-02, 4.577e-01, -2.881e-01, 1.469e-01, 9.889e-02, -1.078e-01, 3.740e-01, 4.231e-02, 6.895e-02) * s0_2_1;
	r0 += M4(-3.748e-01, -1.959e-02, 2.688e-01, 4.707e-01, 7.464e-01, 1.933e-01, 7.869e-01, -9.682e-01, -6.337e-02, -2.855e-02, 1.389e-01, -3.239e-01, -2.026e-01, 3.589e-02, -5.515e-01, -5.577e-01) * s0_2_2;
	r1 += M4(3.610e-01, 3.792e-02, 3.543e-01, 1.739e-01, 2.925e-01, -1.000e+00, 2.745e-01, -1.000e+00, 3.442e-01, 2.246e-01, 3.969e-02, 2.209e-01, -1.477e-01, -9.986e-02, -4.072e-01, 9.494e-02) * s0_2_2;
	r2 += M4(2.319e-01, -3.005e-01, -5.837e-02, 5.701e-02, -1.055e-01, -1.334e-01, 3.004e-01, -5.961e-02, 7.986e-02, 1.912e-02, -3.689e-03, -3.094e-02, -1.023e-01, 3.790e-01, -4.284e-02, -3.135e-02) * s0_2_2;
	r0 += V4(4.805e-03, 2.806e-02, 1.261e-02, 1.173e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-7.202e-02, 1.409e-02, -2.948e-02, 6.207e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-5.126e-02, 4.930e-03, 4.749e-02, -1.679e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_DS] -conv2
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-9.537e-02, -6.222e-02, -4.112e-02, -1.501e-01, -9.816e-02, -1.604e-01, -4.681e-02, 7.585e-02, 1.734e-02, 8.863e-02, 7.282e-02, 1.355e-01, 1.047e-02, 3.018e-02, 5.062e-02, -3.936e-01) * s0_0_0;
	r1 += M4(-6.323e-02, 3.230e-01, 9.317e-02, 1.851e-01, 3.314e-02, 1.462e-01, 3.449e-01, 2.580e-02, 1.473e-01, 1.131e-03, -1.681e-01, -1.277e-01, 4.115e-02, 5.321e-03, 6.560e-03, 8.262e-02) * s0_0_0;
	r2 += M4(3.897e-02, 2.618e-02, -1.941e-01, 4.157e-03, 3.084e-02, -9.830e-02, -1.066e-01, -6.154e-03, 5.299e-02, 2.301e-02, 2.850e-01, 2.474e-02, 6.986e-03, -1.458e-01, -1.602e-01, -7.756e-02) * s0_0_0;
	r0 += M4(1.268e-01, 1.139e-02, 6.723e-02, 1.423e-01, -8.422e-02, 1.029e-01, 5.382e-01, -4.293e-02, -7.580e-02, -8.558e-02, 1.294e-01, -4.990e-01, 1.519e-01, -3.872e-02, -7.682e-02, 1.725e-02) * s0_0_1;
	r1 += M4(4.727e-03, 1.988e-01, -4.975e-02, 1.228e-01, 1.782e-01, 2.241e-01, -4.395e-01, 3.467e-01, 2.438e-01, -2.290e-01, 9.350e-03, 2.791e-01, -5.589e-02, 1.592e-01, 5.024e-02, -3.107e-02) * s0_0_1;
	r2 += M4(-3.333e-02, -1.923e-01, -2.901e-01, -1.890e-01, -2.171e-01, -1.000e+00, -3.132e-01, -6.426e-01, 9.232e-02, 2.753e-01, 3.498e-01, -6.478e-02, 2.997e-02, -4.111e-01, -8.091e-02, -6.078e-02) * s0_0_1;
	r0 += M4(5.891e-02, 1.869e-02, -6.680e-03, 1.204e-01, -5.842e-02, 2.222e-02, 7.469e-03, 3.706e-02, -6.955e-02, -4.906e-02, -1.252e-01, 6.828e-02, 3.663e-02, 3.113e-02, 4.005e-02, -2.509e-01) * s0_0_2;
	r1 += M4(2.093e-02, -4.325e-02, -6.975e-03, -2.359e-02, 7.723e-03, -3.686e-01, -7.542e-02, -8.618e-02, 1.039e-02, -1.609e-01, 5.168e-02, 3.469e-02, -1.641e-04, 1.231e-01, 1.880e-03, 2.698e-02) * s0_0_2;
	r2 += M4(5.820e-02, -1.598e-02, -2.284e-01, -6.433e-02, 1.773e-02, -2.536e-01, -6.818e-02, -2.114e-01, 3.869e-02, 3.662e-01, 7.152e-02, 1.394e-01, -1.376e-02, -1.681e-01, 2.465e-02, -5.742e-02) * s0_0_2;
	r0 += M4(-2.882e-01, -8.722e-02, -1.470e-01, 2.869e-02, 5.340e-02, -3.474e-01, -8.024e-03, -1.029e-01, -1.672e-01, -1.569e-01, 3.051e-02, -1.018e-02, 1.891e-01, -1.298e-01, -5.092e-02, -2.597e-01) * s0_1_0;
	r1 += M4(-9.506e-03, 4.515e-02, 2.075e-01, -4.453e-02, 1.597e-01, -1.840e-01, 1.163e-01, -2.076e-01, 4.858e-02, -3.015e-01, -1.023e-01, 1.744e-01, -1.289e-02, 1.178e-01, 2.786e-01, -8.803e-02) * s0_1_0;
	r2 += M4(7.512e-02, 1.222e-01, -2.506e-01, 9.833e-02, -2.673e-02, 1.672e-01, -1.419e-01, 6.457e-02, -2.585e-01, 1.578e-01, 2.331e-01, 3.566e-02, 1.500e-01, -6.105e-02, -2.161e-01, -3.696e-02) * s0_1_0;
	r0 += M4(5.060e-02, 2.235e-02, -1.490e-01, -5.913e-01, 1.618e-01, 3.120e-01, 4.816e-01, 4.149e-02, -3.621e-03, 6.595e-02, 2.510e-01, -1.000e+00, 1.076e-01, -1.196e-01, -1.331e-01, -1.532e-01) * s0_1_1;
	r1 += M4(-8.083e-02, -1.986e-02, 1.471e-01, -3.615e-02, -1.810e-01, 5.055e-01, -3.085e-01, 1.762e-01, -1.142e-01, 2.513e-02, 2.106e-01, 1.070e-01, -1.262e-01, -4.647e-01, -1.455e-01, -1.195e-01) * s0_1_1;
	r2 += M4(-4.931e-02, 1.045e-01, -3.558e-01, -9.933e-04, -6.402e-02, 2.258e-01, 4.659e-01, 1.562e-01, -2.380e-01, 9.937e-02, 2.128e-01, -5.723e-03, 1.067e-01, -3.348e-01, 3.146e-02, -2.706e-04) * s0_1_1;
	r0 += M4(4.469e-02, -5.949e-03, -1.009e-01, -3.353e-03, 4.853e-02, 6.285e-02, -1.263e-01, 2.408e-01, -9.150e-03, -2.516e-02, -9.384e-02, -4.431e-01, -2.454e-02, 5.200e-02, 1.015e-01, 3.136e-01) * s0_1_2;
	r1 += M4(-1.390e-02, 8.657e-02, 5.970e-02, -8.898e-02, 1.601e-01, 7.121e-03, 4.721e-02, 1.141e-01, -1.616e-01, 1.796e-01, -2.527e-02, 4.222e-02, -1.819e-02, -1.705e-01, 5.432e-02, 3.648e-02) * s0_1_2;
	r2 += M4(8.431e-04, 7.911e-02, -2.321e-01, 4.091e-03, -1.067e-01, 1.687e-01, -3.247e-01, -1.809e-02, -1.387e-02, 3.857e-02, 1.412e-01, -3.164e-02, -6.672e-02, -4.475e-02, 1.826e-01, 4.314e-02) * s0_1_2;
	r0 += M4(1.731e-01, -3.211e-02, 1.177e-01, 1.792e-01, 1.945e-02, 9.531e-02, 6.243e-03, 2.039e-02, -1.200e-01, -3.879e-02, -2.506e-02, 9.529e-02, 1.753e-01, -8.318e-02, 6.100e-02, -1.573e-01) * s0_2_0;
	r1 += M4(-2.999e-02, 1.993e-02, 1.580e-01, 1.530e-01, 5.240e-02, 4.363e-02, -9.969e-02, -8.465e-02, 5.100e-03, -1.301e-01, 2.143e-02, 6.141e-02, -3.104e-02, -9.639e-02, 4.568e-02, 3.796e-02) * s0_2_0;
	r2 += M4(1.217e-02, 2.105e-02, -1.919e-01, 1.174e-01, -3.579e-02, -5.288e-02, -1.401e-01, 2.123e-02, -3.442e-02, 5.260e-02, 4.948e-02, 3.911e-03, 5.899e-03, 4.237e-02, 3.375e-02, 3.114e-02) * s0_2_0;
	r0 += M4(1.631e-02, -1.402e-02, -4.792e-02, 7.559e-02, -6.179e-02, -5.455e-02, 1.074e-01, 2.067e-01, -2.057e-02, 5.458e-02, -1.117e-02, -2.292e-01, 1.491e-01, -1.001e-02, 5.835e-02, 1.657e-01) * s0_2_1;
	r1 += M4(-9.256e-02, -4.801e-02, 5.022e-02, 1.462e-01, -7.199e-02, 3.050e-02, 6.137e-02, 1.167e-01, 3.514e-02, -3.312e-01, 1.190e-02, 6.500e-03, 1.708e-02, -2.329e-03, -5.787e-02, -2.939e-02) * s0_2_1;
	r2 += M4(1.149e-01, 1.110e-01, -2.238e-01, 2.781e-02, -1.613e-01, 1.189e-01, -1.176e-02, 3.772e-02, 1.533e-01, -1.112e-01, -1.835e-02, -3.811e-02, -1.131e-01, 5.351e-02, 6.726e-02, 3.521e-02) * s0_2_1;
	r0 += M4(1.495e-03, -2.646e-03, -7.988e-02, 8.551e-02, 1.499e-01, 1.115e-01, 4.602e-02, 2.182e-01, -1.450e-01, -7.437e-02, -1.120e-02, -3.951e-02, 8.454e-02, 2.795e-02, -3.093e-02, 5.673e-02) * s0_2_2;
	r1 += M4(-1.412e-02, 3.428e-01, 7.726e-02, -1.123e-02, -1.321e-01, -4.036e-03, 7.818e-02, 5.795e-03, 3.326e-02, -1.019e-01, -1.060e-02, 8.970e-02, -1.639e-02, -1.489e-01, -1.122e-01, 5.799e-04) * s0_2_2;
	r2 += M4(3.017e-02, 2.016e-01, -1.970e-01, 8.793e-02, 4.450e-02, 1.675e-02, 8.259e-02, 3.083e-02, -4.238e-02, -6.322e-02, 9.851e-02, -3.574e-02, 2.824e-02, 4.470e-02, -3.434e-02, 4.731e-03) * s0_2_2;
	r0 += M4(-2.108e-02, 2.911e-02, 2.632e-02, 1.871e-01, -4.979e-02, -1.455e-01, -1.413e-01, -2.512e-01, 4.410e-02, -4.201e-02, -5.760e-02, -1.064e-01, 7.391e-02, 1.927e-01, -3.078e-02, 4.335e-01) * s1_0_0;
	r1 += M4(4.195e-02, -4.419e-02, 1.592e-01, -2.261e-02, -4.449e-02, 4.438e-02, 1.265e-01, 3.166e-02, 2.796e-02, 2.624e-02, -5.166e-02, -8.511e-02, -2.656e-02, 4.800e-02, 2.017e-02, -3.172e-02) * s1_0_0;
	r2 += M4(-2.163e-02, -1.038e-01, -4.509e-02, -1.135e-01, 1.360e-02, 3.059e-02, -9.060e-02, 4.730e-02, 1.301e-02, -1.568e-02, -1.212e-01, 1.815e-02, 5.126e-02, -4.317e-02, 1.133e-01, 1.177e-01) * s1_0_0;
	r0 += M4(-5.582e-02, 1.152e-02, -1.139e-01, 1.919e-01, -2.163e-01, -6.089e-02, -1.451e-01, -2.555e-01, 4.968e-02, 2.884e-02, 4.497e-02, 7.087e-02, 1.478e-01, 1.106e-01, 6.015e-03, 3.249e-01) * s1_0_1;
	r1 += M4(-6.558e-02, 1.866e-01, 1.675e-01, -5.338e-02, -6.798e-02, 1.710e-01, -2.712e-01, 1.827e-02, 8.701e-02, -4.016e-02, -8.305e-03, -1.322e-02, 7.316e-02, -5.813e-02, 2.374e-02, -1.165e-01) * s1_0_1;
	r2 += M4(2.700e-02, -2.521e-02, -3.700e-02, 1.733e-01, 4.059e-02, -1.000e+00, -1.073e-01, -1.617e-01, 7.723e-02, -1.821e-02, -8.960e-02, -7.397e-02, -4.659e-02, 1.545e-01, 1.338e-01, 2.611e-01) * s1_0_1;
	r0 += M4(1.036e-01, 4.303e-02, 4.097e-02, 1.176e-01, -1.040e-01, 1.726e-02, -5.886e-02, 1.488e-01, -6.550e-03, -4.774e-02, 4.007e-03, 2.566e-02, 1.398e-01, 1.607e-01, 1.197e-01, 4.845e-01) * s1_0_2;
	r1 += M4(1.319e-02, -5.816e-02, -1.750e-02, 8.377e-02, -1.243e-02, -1.340e-02, -6.460e-02, -6.646e-02, 3.916e-02, -6.172e-02, -3.254e-02, -3.981e-03, 1.597e-02, 4.897e-03, 2.782e-02, -5.749e-02) * s1_0_2;
	r2 += M4(1.729e-02, -9.733e-02, 2.121e-02, -6.778e-03, -4.760e-02, 2.496e-01, -1.449e-01, 8.999e-02, 2.497e-02, 1.202e-01, 2.808e-02, 1.054e-01, -3.974e-02, 3.904e-02, 1.582e-01, 1.398e-01) * s1_0_2;
	r0 += M4(1.657e-02, -4.458e-02, 6.727e-02, 3.136e-01, 4.629e-02, 2.259e-01, 8.093e-02, -1.519e-01, 4.649e-02, 4.950e-02, -2.457e-02, -9.009e-02, 2.142e-01, 3.132e-01, 5.841e-04, 8.480e-01) * s1_1_0;
	r1 += M4(4.108e-02, 3.036e-02, 8.509e-02, 9.319e-02, -2.411e-02, -2.704e-01, 2.384e-01, -1.343e-02, 7.470e-02, -6.588e-04, -6.715e-02, -6.242e-02, 1.676e-02, -1.739e-01, 9.051e-02, -6.469e-03) * s1_1_0;
	r2 += M4(-4.987e-02, -1.168e-01, -5.416e-02, 2.143e-02, 4.544e-02, 3.199e-02, -9.906e-02, -1.042e-01, 6.939e-03, 6.084e-02, -7.748e-02, 8.428e-02, -2.644e-02, 3.488e-04, 2.135e-01, 1.670e-01) * s1_1_0;
	r0 += M4(3.350e-02, 6.381e-02, -1.291e-02, 3.189e-01, 6.895e-01, 9.769e-02, -3.931e-01, -2.654e-01, 2.854e-01, 7.398e-02, 1.039e-01, -6.767e-01, 3.314e-01, 3.965e-01, 4.756e-01, 9.040e-01) * s1_1_1;
	r1 += M4(6.892e-02, 7.797e-02, 2.030e-01, -2.219e-02, -6.626e-01, -7.296e-01, -9.695e-01, -1.063e-01, 8.567e-02, 1.668e-01, -3.782e-02, -7.494e-02, 2.018e-01, -3.499e-02, -1.780e-02, 4.807e-02) * s1_1_1;
	r2 += M4(5.333e-02, 3.523e-01, -1.944e-01, 2.358e-01, -1.190e-01, -2.856e-01, 5.479e-01, 1.061e-01, 7.580e-02, -5.007e-02, -3.727e-01, -1.256e-01, 4.702e-02, -1.690e-01, 2.726e-01, 2.823e-01) * s1_1_1;
	r0 += M4(1.341e-01, 7.804e-02, 4.471e-02, 2.013e-01, 8.329e-02, -4.528e-02, -4.466e-02, -5.360e-01, -8.283e-02, -1.630e-01, -1.058e-01, -8.543e-01, 3.014e-01, 1.758e-01, 7.539e-02, 3.998e-01) * s1_1_2;
	r1 += M4(2.667e-02, 2.701e-01, 3.567e-02, 1.647e-01, 5.923e-04, 2.984e-01, -2.736e-01, -1.685e-01, 8.928e-03, 2.622e-01, -2.086e-01, -2.822e-01, -5.777e-02, -7.598e-02, -6.664e-02, -6.729e-02) * s1_1_2;
	r2 += M4(7.402e-03, -5.156e-02, -4.214e-02, -7.944e-02, -8.093e-03, -1.643e-01, 3.781e-02, -1.322e-02, 1.790e-01, 2.291e-02, -1.808e-01, -4.481e-02, 7.669e-02, -4.745e-02, 9.611e-02, 7.322e-02) * s1_1_2;
	r0 += M4(-3.258e-02, -2.227e-03, 1.093e-02, -2.815e-01, -1.375e-01, 3.974e-02, -4.012e-02, -1.699e-01, -5.297e-03, 2.682e-02, -2.879e-02, 1.727e-01, 1.875e-01, 2.656e-01, 8.331e-02, 7.276e-01) * s1_2_0;
	r1 += M4(-1.085e-02, -1.964e-01, 6.626e-02, -9.093e-02, -3.603e-02, -1.986e-01, 1.096e-01, -9.827e-02, -8.917e-03, 4.834e-03, -4.515e-02, -4.194e-02, 4.974e-02, -3.432e-02, 8.658e-02, -6.468e-02) * s1_2_0;
	r2 += M4(1.290e-02, -1.180e-01, -4.988e-02, -4.278e-02, 5.898e-02, 1.481e-01, -1.144e-01, 1.127e-03, 3.605e-03, 2.354e-02, -6.692e-02, -3.784e-03, 6.749e-02, -1.134e-01, 8.347e-02, 1.203e-01) * s1_2_0;
	r0 += M4(2.322e-02, 8.847e-02, 6.432e-02, 1.401e-01, -2.540e-02, -8.838e-02, -4.901e-02, -1.119e-01, -1.046e-02, 1.820e-02, 3.526e-02, 9.203e-03, 1.950e-01, 2.444e-01, 2.048e-01, 9.386e-01) * s1_2_1;
	r1 += M4(-1.039e-01, 3.093e-02, -8.702e-02, -1.188e-01, 4.819e-02, -3.178e-02, 7.515e-02, -2.349e-01, 1.317e-01, -1.164e-01, 3.642e-02, -5.685e-02, 2.636e-02, -4.932e-01, -1.062e-02, 6.992e-02) * s1_2_1;
	r2 += M4(-2.162e-02, -2.863e-02, -1.945e-01, -6.100e-03, 1.142e-01, 5.428e-02, 1.216e-01, -4.402e-02, 1.532e-01, -2.315e-01, -2.277e-01, -7.774e-02, -4.606e-03, 1.950e-02, 1.589e-01, 1.841e-01) * s1_2_1;
	r0 += M4(-2.804e-02, -2.117e-02, -5.729e-02, 2.634e-01, -1.297e-01, -5.431e-02, 6.721e-02, -1.833e-01, -6.236e-02, -9.952e-02, -6.416e-02, -5.776e-01, 1.227e-01, 1.502e-01, 1.573e-01, 5.378e-01) * s1_2_2;
	r1 += M4(-7.064e-02, 6.063e-02, 9.205e-02, -7.909e-03, 4.823e-02, 2.771e-01, 5.738e-02, -8.459e-02, -3.867e-02, 2.850e-02, -3.981e-02, -1.418e-01, -2.217e-02, -1.539e-01, -1.023e-01, 3.619e-02) * s1_2_2;
	r2 += M4(-5.148e-02, -9.707e-02, -2.065e-01, 3.869e-02, -6.914e-02, -9.249e-02, -1.083e-01, -1.833e-02, -1.204e-01, -4.323e-02, -1.438e-01, -2.397e-02, 8.925e-03, 2.166e-02, 3.591e-02, 1.607e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-7.874e-02, -2.111e-02, -4.337e-02, -9.238e-03, 1.051e-01, 1.674e-01, 3.787e-02, 2.578e-01, -3.029e-02, 5.677e-03, -3.617e-02, 6.603e-04, 2.573e-02, -5.811e-02, -9.445e-03, -2.385e-01) * s0_0_0;
	r1 += M4(-2.092e-02, 1.206e-01, -4.067e-02, -8.202e-03, 1.072e-01, -3.076e-02, -3.190e-01, 1.116e-01, -3.514e-02, -4.012e-02, -1.803e-02, 2.703e-02, 1.131e-01, 1.468e-01, 2.437e-01, -8.235e-02) * s0_0_0;
	r2 += M4(4.030e-02, 7.216e-02, 9.574e-02, -1.441e-02, -5.162e-02, -4.325e-02, 1.372e-01, 5.808e-02, -1.106e-02, -1.214e-02, -4.436e-03, -1.387e-02, 9.048e-02, -2.072e-01, -1.965e-01, 9.286e-02) * s0_0_0;
	r0 += M4(3.961e-02, -9.090e-02, -4.128e-02, -4.676e-03, 1.893e-01, -2.753e-02, -3.086e-01, -1.000e+00, -7.686e-02, 3.578e-02, 9.499e-02, 4.968e-01, 5.676e-02, -8.186e-02, -6.236e-02, 1.358e-01) * s0_0_1;
	r1 += M4(-1.479e-02, 3.414e-01, 2.206e-02, 1.442e-01, -8.036e-02, -2.713e-01, -2.537e-02, -1.597e-01, 1.597e-02, 7.045e-02, -7.837e-02, -1.381e-01, -8.257e-02, -1.591e-02, -1.278e-02, 5.371e-03) * s0_0_1;
	r2 += M4(6.199e-02, -2.213e-01, 5.357e-02, -1.256e-01, 1.053e-01, 1.000e+00, 2.112e-01, 8.375e-01, -1.611e-01, -1.560e-01, 2.938e-02, 1.673e-01, -8.856e-02, -1.171e-01, -1.849e-01, -7.828e-02) * s0_0_1;
	r0 += M4(-6.941e-02, -5.584e-02, 1.038e-02, -1.643e-01, 5.715e-02, -3.784e-02, 1.359e-02, -5.268e-01, -1.146e-01, 1.377e-02, -1.416e-01, 7.527e-02, -6.016e-02, -4.821e-03, 8.661e-03, 8.874e-02) * s0_0_2;
	r1 += M4(-1.054e-01, 2.426e-02, -3.931e-02, 1.618e-01, -6.081e-02, -2.486e-02, -2.068e-01, 2.027e-02, 9.820e-02, 8.038e-02, 3.390e-02, -1.737e-01, -6.460e-02, -3.379e-02, 3.662e-02, 2.092e-02) * s0_0_2;
	r2 += M4(-5.809e-02, -2.321e-02, 9.843e-02, -9.732e-02, 2.715e-02, -9.951e-02, -5.871e-03, -1.519e-01, 3.773e-02, 1.611e-01, -5.256e-02, 2.993e-01, 1.805e-02, 2.943e-02, 8.834e-02, 5.958e-02) * s0_0_2;
	r0 += M4(2.510e-02, 7.117e-02, -8.251e-03, 9.804e-04, 6.614e-02, 2.377e-01, 8.564e-02, 3.186e-01, 4.997e-02, -6.242e-02, -1.591e-02, 2.973e-01, 4.171e-01, 4.250e-01, 1.913e-01, -1.000e+00) * s0_1_0;
	r1 += M4(4.958e-02, -1.087e-01, 1.323e-01, -4.585e-02, 6.641e-02, 1.825e-01, 2.680e-03, 1.566e-01, 8.625e-02, 1.379e-01, -1.094e-01, -1.230e-02, -1.238e-01, 7.009e-02, -4.951e-01, -5.352e-01) * s0_1_0;
	r2 += M4(-5.590e-02, 4.565e-02, 1.042e-01, 3.151e-02, -6.004e-02, -1.199e-01, 5.003e-02, -1.112e-01, -2.449e-02, -1.091e-01, -4.235e-02, 3.233e-05, -3.571e-02, 3.739e-01, 2.805e-01, -2.878e-01) * s0_1_0;
	r0 += M4(-4.199e-02, -1.865e-01, -2.263e-01, 9.321e-02, -1.764e-01, 2.108e-01, -1.457e-01, -7.892e-02, -2.001e-01, -1.000e+00, 1.466e-01, -3.610e-01, -3.222e-01, -1.287e-01, 1.053e-01, 6.523e-02) * s0_1_1;
	r1 += M4(-9.562e-03, -3.119e-01, 6.791e-02, 1.550e-01, -1.061e-02, -5.752e-01, 1.029e-01, 3.408e-01, 5.722e-01, -2.471e-02, 1.290e-01, -3.968e-02, 2.691e-01, 4.483e-01, 7.538e-01, 3.829e-02) * s0_1_1;
	r2 += M4(1.943e-02, 4.455e-01, 2.631e-01, 1.491e-01, 9.432e-01, 4.604e-03, 2.408e-02, -2.459e-02, 5.794e-01, -1.147e-01, 1.368e-03, 8.635e-02, 2.367e-01, -7.798e-02, -4.529e-02, 1.476e-01) * s0_1_1;
	r0 += M4(-1.479e-01, -3.685e-02, 2.417e-02, -2.653e-01, -6.531e-02, -4.880e-02, -5.682e-02, 9.951e-02, 6.744e-02, 3.017e-02, 3.974e-01, -2.713e-01, 9.489e-02, 2.748e-02, 7.598e-02, 2.172e-01) * s0_1_2;
	r1 += M4(-1.654e-01, -2.718e-02, 7.997e-02, -7.068e-02, 1.899e-01, 1.984e-01, 1.420e-02, 1.051e-01, -1.397e-01, -2.694e-01, 2.726e-02, -1.097e-01, -2.434e-02, -1.537e-01, 1.499e-02, -3.451e-02) * s0_1_2;
	r2 += M4(-5.256e-02, 2.070e-01, 2.569e-01, 8.533e-02, -7.101e-02, 7.671e-03, -1.890e-01, 1.543e-02, 2.108e-01, 2.115e-01, 4.819e-01, 2.053e-01, -1.846e-02, 9.280e-02, 1.076e-01, 5.682e-02) * s0_1_2;
	r0 += M4(-6.950e-02, -1.094e-03, 1.382e-01, 1.617e-01, 7.724e-02, 6.691e-02, 5.859e-02, 3.090e-02, -1.062e-02, 2.055e-02, -3.670e-03, -1.213e-01, 6.939e-03, 3.340e-02, -3.381e-02, -4.793e-01) * s0_2_0;
	r1 += M4(-1.440e-01, -1.287e-01, 2.203e-01, 1.247e-01, -2.034e-02, -1.830e-02, -4.571e-02, 3.402e-02, 8.837e-02, -1.618e-02, -1.429e-02, -8.247e-02, 8.439e-02, -3.976e-01, 2.977e-03, -3.662e-01) * s0_2_0;
	r2 += M4(7.904e-02, 5.752e-02, 2.023e-01, 4.444e-02, -9.842e-02, -1.032e-01, 4.353e-02, -2.248e-02, -3.571e-02, -4.746e-02, -3.989e-02, 1.934e-02, -5.257e-01, 4.747e-02, 3.180e-02, 2.888e-02) * s0_2_0;
	r0 += M4(-1.758e-03, -2.086e-03, -1.464e-01, 3.593e-01, 6.441e-02, -7.233e-03, 5.975e-02, 2.113e-01, -1.021e-01, 4.562e-02, -4.268e-02, 4.740e-02, -4.170e-02, -7.264e-02, 1.891e-01, 6.187e-02) * s0_2_1;
	r1 += M4(-7.300e-02, 9.651e-02, 1.378e-01, 2.710e-01, -1.323e-01, 6.776e-02, 2.430e-02, 7.785e-02, 1.276e-01, -9.370e-02, -1.950e-01, -3.302e-01, -1.296e-01, 5.222e-02, 1.162e-01, 1.198e-01) * s0_2_1;
	r2 += M4(-2.039e-01, 7.977e-02, 2.271e-01, -2.143e-03, -3.176e-02, 8.410e-02, 8.655e-02, 8.511e-02, -2.655e-01, 9.320e-02, -4.212e-02, -1.224e-01, -5.120e-01, -9.843e-02, 3.016e-01, -1.357e-01) * s0_2_1;
	r0 += M4(1.054e-01, 6.098e-02, 4.531e-02, 6.176e-02, 4.196e-02, 4.105e-02, 2.156e-02, 1.535e-01, -1.538e-01, 2.185e-03, 1.244e-01, -3.191e-01, 5.016e-03, 2.571e-03, 5.531e-02, -3.821e-02) * s0_2_2;
	r1 += M4(-1.668e-01, -1.375e-01, 5.376e-02, 7.044e-02, -2.907e-02, -1.126e-01, -2.214e-02, 7.779e-02, 1.735e-02, -2.525e-02, 1.294e-01, 2.258e-02, -9.591e-02, 8.285e-02, 5.663e-02, 3.935e-03) * s0_2_2;
	r2 += M4(-1.500e-01, 1.974e-01, 1.852e-01, 1.288e-01, -3.386e-02, -6.428e-02, 8.014e-02, 1.513e-04, 9.352e-02, -1.284e-01, -7.211e-02, 3.641e-02, 6.673e-02, 5.218e-02, 4.077e-02, 2.395e-02) * s0_2_2;
	r0 += V4(6.578e-03, 2.212e-02, -1.035e-02, 2.020e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.911e-02, -1.805e-02, 2.739e-03, 7.450e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-1.128e-02, -2.384e-02, -4.026e-03, -7.107e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_DS] -conv3
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv2
//!BIND LUMA
//!SAVE conv3
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-6.571e-02, -1.635e-02, 1.087e-02, 1.203e-01, -8.922e-02, -1.079e-02, 2.274e-02, -5.612e-02, 4.248e-02, 6.164e-02, -3.989e-02, -1.481e-01, 1.135e-01, -3.410e-02, -2.666e-02, 1.294e-01) * s0_0_0;
	r1 += M4(-3.123e-02, 1.145e-01, -1.130e-01, -2.734e-02, -5.569e-02, 3.812e-03, 9.019e-02, 3.485e-02, -3.752e-02, -2.355e-02, -1.452e-02, -7.719e-02, 4.908e-02, -7.451e-02, -4.056e-02, -2.943e-02) * s0_0_0;
	r2 += M4(1.367e-01, -1.928e-01, 1.255e-01, 2.812e-02, 7.311e-02, 1.638e-01, 1.747e-02, -1.765e-02, -1.705e-01, -6.211e-02, -3.299e-02, 8.819e-02, -4.990e-03, 3.578e-02, 7.195e-02, 7.362e-02) * s0_0_0;
	r0 += M4(-3.060e-02, -9.548e-02, 9.584e-02, 8.403e-03, 2.870e-02, 1.398e-01, -5.226e-02, -1.058e-01, 9.650e-02, -1.401e-01, -9.952e-02, -1.788e-01, 2.342e-01, -5.718e-02, -3.815e-01, 2.202e-01) * s0_0_1;
	r1 += M4(-9.563e-02, 3.682e-02, -1.092e-01, -2.579e-01, -3.604e-02, -7.514e-02, 2.530e-01, 2.669e-02, 7.333e-02, 8.371e-02, -1.616e-01, -9.809e-02, 9.999e-02, -8.393e-02, -3.790e-02, -2.964e-02) * s0_0_1;
	r2 += M4(7.533e-02, -1.539e-01, 1.810e-01, 2.354e-02, -8.667e-02, 4.425e-02, -2.470e-01, -2.690e-02, 3.179e-02, -1.184e-01, 3.905e-01, 3.929e-02, -9.837e-02, 2.292e-01, 1.104e-01, 6.313e-02) * s0_0_1;
	r0 += M4(-5.130e-02, -3.404e-02, 6.552e-02, -2.717e-02, 1.632e-01, 8.875e-02, -7.153e-02, -5.024e-02, -1.430e-02, -1.216e-02, -1.199e-01, -4.525e-02, 8.230e-02, -3.789e-02, -1.557e-01, 1.577e-01) * s0_0_2;
	r1 += M4(3.799e-02, 5.268e-02, -6.426e-03, 8.854e-02, 6.823e-02, 2.338e-02, -4.227e-02, -8.247e-02, 5.697e-02, -8.390e-02, 2.842e-02, 2.786e-02, 9.725e-03, 6.695e-02, -2.357e-02, -6.190e-03) * s0_0_2;
	r2 += M4(-5.576e-02, -3.142e-02, 4.878e-02, 2.587e-02, 1.700e-01, 9.637e-02, -9.107e-02, -1.956e-02, 1.703e-03, -1.811e-01, 1.309e-02, 1.521e-02, 9.030e-03, 8.318e-02, 2.497e-02, 2.145e-02) * s0_0_2;
	r0 += M4(2.969e-03, -1.411e-01, -9.300e-02, 9.159e-02, 5.824e-03, 6.210e-02, -6.946e-02, -1.200e-01, -1.436e-01, 2.241e-02, 1.062e-01, -9.580e-04, 1.618e-01, 4.575e-02, 1.164e-02, 2.028e-01) * s0_1_0;
	r1 += M4(6.820e-02, -1.477e-01, -7.501e-02, 2.828e-02, 3.811e-03, -1.463e-02, 9.033e-02, -3.089e-02, 1.607e-01, -6.245e-02, 2.222e-02, 2.608e-01, -4.007e-02, -1.060e-01, -1.311e-02, -1.199e-01) * s0_1_0;
	r2 += M4(9.149e-02, -7.769e-02, 3.141e-01, 1.211e-01, -8.357e-02, 7.215e-03, -1.702e-01, 3.372e-02, 1.492e-01, 3.962e-02, 8.428e-02, -2.108e-01, -9.586e-02, 1.343e-01, 6.508e-02, 1.699e-01) * s0_1_0;
	r0 += M4(-3.472e-01, -4.634e-01, -3.486e-01, -2.776e-01, 9.281e-02, 4.943e-01, 1.776e-01, -3.580e-01, -8.561e-01, 5.730e-02, 1.507e-01, -4.703e-01, 2.143e-01, -8.464e-03, -1.479e-01, 2.458e-01) * s0_1_1;
	r1 += M4(-5.488e-01, 7.351e-02, -6.321e-02, -7.403e-01, 3.314e-01, 2.866e-01, 3.248e-01, 2.999e-01, -1.073e-01, -2.740e-01, 2.151e-01, 3.214e-01, -1.287e-02, -2.203e-01, 3.858e-03, -1.482e-04) * s0_1_1;
	r2 += M4(-8.537e-01, -1.061e-01, 3.803e-01, 1.408e-01, 1.841e-01, 3.426e-01, -8.105e-01, 1.085e-01, 4.859e-01, -3.545e-01, 3.821e-01, 7.369e-02, 4.722e-02, 3.414e-02, 1.461e-01, 3.477e-02) * s0_1_1;
	r0 += M4(6.320e-02, 9.377e-02, -1.969e-02, 1.859e-02, -3.037e-01, 1.189e-01, 4.443e-02, -4.888e-01, -1.460e-02, 8.881e-02, 2.140e-01, 6.129e-02, 2.540e-01, -3.723e-02, -1.372e-01, 2.431e-01) * s0_1_2;
	r1 += M4(-2.598e-02, 7.172e-02, 3.359e-02, -1.425e-01, -1.651e-01, -4.620e-01, 1.518e-01, -1.422e-01, 7.717e-02, -2.698e-02, 1.645e-02, -5.967e-02, 4.064e-02, 9.200e-02, -1.477e-02, -6.824e-03) * s0_1_2;
	r2 += M4(-1.273e-01, 2.466e-01, 6.903e-02, 1.691e-02, 4.197e-03, -4.391e-01, -1.844e-01, -3.356e-02, -1.372e-01, 3.862e-03, -7.712e-02, 1.234e-01, 1.894e-02, 1.697e-01, 3.028e-03, 1.113e-01) * s0_1_2;
	r0 += M4(-3.603e-02, -2.969e-02, 3.172e-02, 5.587e-02, 1.185e-02, -5.760e-02, 2.490e-02, -5.640e-02, 6.079e-02, 5.660e-02, -7.104e-02, -1.113e-02, 4.699e-02, -9.889e-03, 1.895e-02, 7.506e-02) * s0_2_0;
	r1 += M4(-2.272e-02, -1.275e-02, -4.092e-02, -7.409e-02, -6.990e-02, 9.111e-02, 1.154e-02, 3.770e-02, -1.164e-01, 1.024e-02, 1.100e-02, -1.029e-01, -4.816e-02, -1.575e-02, -1.013e-02, 3.777e-02) * s0_2_0;
	r2 += M4(-2.395e-01, 3.035e-02, 5.665e-02, 2.152e-03, 1.087e-01, -1.351e-03, -1.462e-02, -4.360e-02, 7.639e-02, 4.626e-02, 1.071e-01, 1.549e-01, -2.938e-02, 2.243e-02, 3.402e-02, 1.084e-01) * s0_2_0;
	r0 += M4(-1.932e-01, -3.509e-01, -5.284e-02, -1.214e-01, 1.258e-01, 2.685e-01, -2.196e-02, -7.249e-02, -4.367e-02, 2.521e-02, -1.178e-01, -1.496e-01, 1.048e-01, -6.953e-02, 6.824e-02, 2.134e-01) * s0_2_1;
	r1 += M4(2.655e-02, 3.718e-02, 6.143e-02, 2.214e-03, 3.066e-03, 1.177e-02, -3.182e-02, -8.189e-02, 9.510e-02, -1.415e-01, 8.357e-02, -2.748e-02, -1.016e-01, -1.626e-02, -5.548e-02, -1.110e-01) * s0_2_1;
	r2 += M4(-3.196e-01, 1.930e-01, 1.014e-02, -9.728e-02, 1.276e-01, -9.635e-03, -2.034e-01, 2.183e-01, -3.095e-03, -1.267e-01, 1.264e-01, -1.835e-03, 7.450e-02, 2.002e-01, 1.042e-01, 1.575e-01) * s0_2_1;
	r0 += M4(-1.057e-01, 3.904e-02, -3.574e-02, -1.043e-01, 8.095e-02, 2.117e-01, -6.186e-02, -8.107e-02, 6.442e-02, -4.902e-02, -5.130e-02, 9.020e-02, 7.993e-02, -6.545e-02, 3.547e-02, 9.029e-02) * s0_2_2;
	r1 += M4(7.889e-02, 1.007e-01, -2.464e-02, -2.385e-02, 1.337e-01, 2.406e-02, 6.277e-02, 1.498e-01, -3.084e-02, -3.931e-02, 7.270e-02, -7.416e-02, -1.021e-01, 3.838e-02, -7.874e-03, -4.042e-03) * s0_2_2;
	r2 += M4(2.864e-02, -7.515e-02, -1.721e-02, 2.157e-02, -2.143e-02, -2.025e-01, 5.969e-02, 5.958e-02, 4.659e-02, 8.709e-02, 4.791e-02, 6.518e-02, 7.602e-02, 1.316e-01, 1.867e-02, 3.419e-02) * s0_2_2;
	r0 += M4(-1.531e-01, -3.851e-02, 9.130e-02, 1.825e-02, 1.014e-01, 4.709e-02, 1.038e-01, 1.009e-01, -1.108e-01, 9.454e-02, 8.354e-02, -3.711e-02, -1.393e-01, -5.198e-03, -7.335e-02, -6.898e-02) * s1_0_0;
	r1 += M4(1.912e-02, -4.122e-02, 5.806e-03, 7.869e-02, -6.633e-04, 9.786e-02, -2.600e-02, 2.404e-01, 6.780e-02, 1.406e-01, 2.539e-02, 1.001e-01, -5.507e-02, 2.949e-02, 2.741e-02, 6.730e-02) * s1_0_0;
	r2 += M4(6.212e-02, 7.691e-02, -1.399e-02, -8.741e-02, -1.523e-01, 5.974e-02, 7.509e-02, 7.180e-02, 8.441e-03, -2.988e-02, -8.749e-02, -2.478e-02, 1.703e-01, -8.933e-02, 1.143e-01, -4.609e-02) * s1_0_0;
	r0 += M4(-6.822e-02, -2.298e-02, -1.272e-01, 1.302e-01, 3.430e-01, 1.201e-02, 1.080e-02, 1.080e-01, -7.931e-02, 5.218e-02, -1.118e-02, 3.861e-02, 1.216e-01, 3.227e-02, -1.267e-01, 1.551e-01) * s1_0_1;
	r1 += M4(-2.021e-01, 1.219e-02, 7.466e-02, 9.507e-02, 6.098e-02, -2.050e-01, -5.530e-02, 2.142e-01, 7.508e-03, -5.669e-02, 3.259e-02, 2.034e-01, 2.451e-01, -2.983e-01, 1.620e-01, 6.274e-02) * s1_0_1;
	r2 += M4(-4.357e-02, 2.222e-01, -1.584e-01, -1.051e-01, -3.057e-01, 3.649e-02, -2.464e-02, 1.233e-01, -7.530e-02, -2.272e-01, -2.093e-01, -9.365e-03, -7.656e-02, -1.035e-01, -2.530e-01, 8.468e-02) * s1_0_1;
	r0 += M4(-1.207e-01, 1.102e-02, -4.413e-02, -2.456e-02, -4.614e-02, 2.905e-02, -2.370e-01, 1.517e-02, -7.538e-02, 6.086e-02, 4.377e-02, -1.126e-01, -2.607e-02, -4.121e-02, -1.010e-01, -6.066e-02) * s1_0_2;
	r1 += M4(-1.747e-01, 8.511e-02, 1.441e-02, -9.295e-02, 5.347e-02, 3.757e-02, 4.146e-02, -1.568e-02, 4.941e-03, -4.106e-02, -2.424e-02, 1.055e-01, 1.765e-01, -3.127e-02, 2.340e-02, 5.822e-02) * s1_0_2;
	r2 += M4(1.151e-01, 8.877e-02, -5.249e-02, -2.519e-02, -9.886e-02, -1.165e-01, -1.656e-02, 8.645e-02, 3.149e-03, -1.826e-01, -1.167e-01, -1.076e-01, -1.539e-01, 2.433e-02, -8.365e-02, 2.826e-02) * s1_0_2;
	r0 += M4(3.622e-02, -1.820e-02, -8.386e-02, -7.555e-02, -2.469e-02, -2.007e-01, -5.044e-04, 3.462e-02, 2.175e-01, 1.298e-01, -2.134e-01, 1.950e-01, 9.291e-02, -1.787e-01, 1.648e-02, 3.307e-02) * s1_1_0;
	r1 += M4(1.410e-02, -7.886e-02, 1.023e-01, -1.319e-01, -3.583e-02, 9.382e-02, -7.898e-02, 1.576e-01, 5.183e-02, 1.622e-01, 2.920e-01, -2.688e-01, 7.308e-02, -1.406e-01, 1.230e-01, -2.172e-01) * s1_1_0;
	r2 += M4(-2.250e-01, 3.233e-03, -6.333e-02, 5.691e-03, 1.042e-01, 5.538e-03, 2.144e-01, -7.580e-02, -1.447e-01, -2.048e-02, -2.192e-01, 1.549e-01, -1.768e-01, -1.814e-02, -9.143e-02, 2.189e-01) * s1_1_0;
	r0 += M4(3.912e-01, 5.254e-01, 4.347e-01, 7.754e-01, 3.755e-01, 3.874e-01, 2.410e-01, 3.706e-02, 2.611e-01, -5.886e-02, -2.346e-01, 1.090e-01, 2.425e-01, -2.732e-01, 1.688e-01, 4.665e-01) * s1_1_1;
	r1 += M4(4.238e-01, 1.920e-01, -1.246e-01, 1.340e-01, -2.405e-02, 9.497e-02, 7.836e-02, -1.531e-01, -5.331e-01, 1.000e+00, -4.358e-01, 3.811e-01, 6.608e-02, 4.230e-01, -2.510e-01, -5.083e-01) * s1_1_1;
	r2 += M4(-1.519e-01, 5.991e-01, -6.818e-01, -6.293e-02, 8.797e-02, -1.256e-01, -1.062e-01, 3.485e-02, 5.168e-02, 1.785e-01, 2.261e-01, 2.036e-01, 1.178e-01, 3.762e-01, -2.677e-01, 4.082e-01) * s1_1_1;
	r0 += M4(-9.954e-02, -1.521e-01, -2.235e-02, 1.486e-01, -2.130e-02, -1.547e-01, 2.026e-01, 1.709e-01, 5.687e-02, 1.941e-01, 1.172e-01, -9.154e-03, -1.961e-01, -1.287e-01, 7.318e-02, -8.294e-02) * s1_1_2;
	r1 += M4(-6.762e-02, 1.524e-01, -3.440e-02, 1.124e-01, -8.999e-02, 7.224e-02, -1.791e-01, 1.907e-01, 4.970e-02, -4.017e-02, 7.227e-02, -5.741e-02, -3.097e-01, 7.769e-02, -1.032e-01, 2.422e-02) * s1_1_2;
	r2 += M4(3.491e-02, 3.201e-01, -1.295e-01, -1.770e-01, 2.257e-01, 3.350e-01, 2.810e-01, 3.078e-02, -1.600e-01, 9.548e-02, -8.011e-02, -1.770e-02, 3.421e-01, 1.561e-01, 1.668e-01, -1.349e-01) * s1_1_2;
	r0 += M4(-1.148e-01, -7.637e-02, 1.894e-02, -5.165e-02, 1.138e-01, -5.505e-02, 1.918e-01, 1.343e-01, 1.179e-01, 4.312e-02, -6.594e-03, 7.008e-02, 5.505e-02, 8.750e-02, 6.022e-02, 8.190e-02) * s1_2_0;
	r1 += M4(-6.520e-02, 8.357e-02, 5.060e-02, -8.566e-02, 6.665e-02, -2.393e-02, -1.194e-03, -3.066e-02, -2.511e-03, 2.253e-01, 4.483e-03, 7.696e-02, 6.863e-02, -8.083e-03, 5.405e-03, -1.319e-02) * s1_2_0;
	r2 += M4(1.159e-01, 4.496e-02, 1.160e-02, 2.368e-01, -7.555e-02, 7.905e-02, -6.112e-02, -6.177e-02, -1.788e-02, -6.374e-03, -1.193e-02, 7.250e-02, -6.666e-03, 2.682e-02, -1.401e-01, 1.186e-01) * s1_2_0;
	r0 += M4(-1.562e-01, 1.480e-01, -1.374e-01, -9.463e-02, 1.090e-01, 1.034e-01, 9.555e-03, -5.030e-02, 3.248e-02, 7.575e-02, 2.202e-02, 1.590e-01, -1.362e-01, -1.113e-01, 1.033e-01, -1.217e-01) * s1_2_1;
	r1 += M4(-1.258e-01, -1.034e-02, -1.524e-02, -2.842e-01, 1.228e-01, -4.894e-02, 5.068e-02, 5.249e-02, -1.190e-02, 4.438e-02, -1.972e-02, -8.807e-02, 1.063e-01, -6.934e-02, -6.811e-02, 8.488e-03) * s1_2_1;
	r2 += M4(1.064e-01, -5.642e-03, -1.480e-01, 1.374e-01, -2.874e-01, -4.856e-02, -1.048e-01, -7.074e-02, 1.979e-01, 2.842e-01, 1.327e-01, -7.848e-02, 4.605e-02, -2.046e-02, 8.130e-03, -1.553e-02) * s1_2_1;
	r0 += M4(-3.610e-02, -7.032e-02, 7.603e-02, 1.942e-02, 1.007e-01, 3.313e-02, 1.057e-01, -2.931e-02, 1.527e-02, 1.101e-01, 4.161e-03, 3.236e-02, -1.130e-01, 2.902e-02, -3.753e-02, -2.356e-01) * s1_2_2;
	r1 += M4(-3.792e-02, 1.144e-01, -1.723e-02, -1.931e-02, 9.857e-02, -2.005e-02, -1.751e-03, 2.125e-01, 8.103e-03, 2.409e-02, -1.737e-02, -9.473e-03, 1.163e-01, -8.704e-02, 3.945e-02, 6.055e-02) * s1_2_2;
	r2 += M4(-1.764e-01, -6.674e-02, -1.558e-01, -6.169e-02, -1.496e-01, -1.793e-01, -3.232e-02, 1.989e-02, 1.171e-01, 2.623e-03, 4.486e-02, 4.865e-02, -1.821e-01, -3.444e-01, -7.107e-02, -1.360e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(8.009e-02, -7.934e-02, 6.546e-03, -8.520e-02, 1.792e-01, 6.039e-02, 6.283e-02, 6.289e-02, 9.968e-02, 4.279e-02, -7.116e-02, 6.195e-02, -1.870e-01, -4.507e-02, -7.538e-03, -1.630e-01) * s0_0_0;
	r1 += M4(-9.212e-02, -3.585e-01, -5.080e-02, -1.059e-01, 7.603e-02, 9.188e-04, 2.131e-02, 1.248e-01, 1.503e-01, -1.179e-01, 1.528e-01, -3.747e-02, -5.785e-02, -5.673e-03, -6.436e-02, 1.799e-03) * s0_0_0;
	r2 += M4(-1.443e-01, 1.090e-02, 1.876e-01, 1.009e-01, -1.026e-01, -3.250e-03, -1.017e-01, 3.522e-02, -1.736e-01, -1.060e-01, -1.471e-01, 3.302e-02, 6.670e-02, 9.044e-02, 1.715e-01, 1.181e-02) * s0_0_0;
	r0 += M4(-4.198e-01, -2.769e-01, 1.585e-01, -7.136e-01, 7.862e-02, -1.465e-02, 3.042e-02, 1.494e-02, -5.745e-02, -1.173e-01, -7.679e-02, 6.560e-02, -3.024e-02, 4.908e-02, -4.984e-03, -7.865e-02) * s0_0_1;
	r1 += M4(-4.015e-01, 4.211e-02, 1.144e-01, -1.772e-01, 1.287e-02, 2.034e-02, -2.153e-02, 4.005e-02, -1.517e-01, 8.454e-02, -5.674e-02, -1.035e-01, -4.165e-02, -6.077e-02, 5.967e-02, -5.635e-02) * s0_0_1;
	r2 += M4(9.689e-02, 1.890e-01, 4.253e-01, -2.552e-01, 2.204e-02, 9.105e-03, -2.284e-02, -3.355e-02, 1.519e-01, -3.766e-03, 3.976e-02, 6.891e-02, -5.862e-02, -3.081e-02, 1.059e-02, 3.992e-02) * s0_0_1;
	r0 += M4(-1.560e-01, 3.030e-02, 8.382e-02, -1.431e-01, 7.690e-02, 6.707e-02, -8.619e-02, 9.023e-02, 2.430e-02, 1.189e-01, 8.364e-02, -4.918e-02, -8.830e-02, -5.081e-02, -2.508e-02, -4.907e-02) * s0_0_2;
	r1 += M4(-2.457e-02, -2.176e-01, 3.918e-02, -2.062e-01, 1.136e-01, -8.674e-02, 1.364e-02, 7.989e-02, -6.566e-02, 3.477e-02, 2.361e-02, -3.036e-01, -3.431e-02, 5.113e-02, -9.450e-03, 1.932e-02) * s0_0_2;
	r2 += M4(-9.524e-02, 3.370e-02, 4.306e-02, 9.143e-03, 2.112e-02, -3.782e-03, -1.489e-01, -4.275e-02, 1.995e-02, -2.813e-02, -7.165e-02, 1.875e-02, 2.487e-02, -2.899e-02, 1.012e-01, 5.434e-02) * s0_0_2;
	r0 += M4(-1.297e-01, -3.993e-02, -8.045e-02, -1.177e-01, 4.378e-02, -1.950e-01, 9.058e-02, 1.266e-01, -1.579e-01, 8.491e-02, -4.409e-02, -7.008e-02, -1.368e-01, 3.482e-02, -3.275e-02, -1.077e-01) * s0_1_0;
	r1 += M4(-1.845e-01, -7.837e-02, -6.348e-02, -2.131e-01, 7.455e-02, 1.048e-01, 1.287e-02, 8.136e-02, -4.610e-01, 2.258e-01, -1.956e-01, -1.034e-01, 1.455e-01, 2.233e-01, 2.502e-02, 1.963e-01) * s0_1_0;
	r2 += M4(1.573e-02, 7.688e-02, 1.839e-01, 1.744e-01, 1.152e-01, 1.511e-01, 4.650e-03, 9.104e-02, 2.229e-01, 7.459e-03, 7.191e-02, -5.850e-02, 7.367e-02, -1.352e-01, -7.260e-03, -2.647e-01) * s0_1_0;
	r0 += M4(-1.921e-01, 1.568e-01, 1.168e-01, -3.975e-01, -1.586e-01, 2.796e-02, 2.566e-01, -3.644e-01, 1.456e-01, -2.433e-01, 9.000e-02, -1.905e-04, 3.705e-01, 3.447e-01, -3.003e-01, 4.697e-02) * s0_1_1;
	r1 += M4(5.670e-01, 8.665e-02, 2.397e-01, 2.022e-01, 1.643e-01, 5.352e-02, 2.791e-01, 2.417e-01, -1.264e-01, -1.222e-02, -8.376e-02, -3.252e-01, 4.588e-01, -3.465e-01, 6.487e-02, 2.745e-01) * s0_1_1;
	r2 += M4(3.017e-01, -3.976e-01, 3.556e-01, -5.669e-01, 2.376e-01, -2.695e-01, -2.567e-01, 2.327e-02, 5.403e-02, 2.491e-01, 3.453e-01, 1.289e-01, -3.471e-01, -1.489e-01, -5.537e-01, 1.005e-01) * s0_1_1;
	r0 += M4(1.753e-01, 7.701e-02, 6.860e-02, -2.182e-01, 1.364e-01, 6.723e-02, 2.253e-01, 4.379e-02, 2.685e-02, 4.716e-02, -9.507e-02, -3.251e-02, -2.585e-02, -1.604e-01, -1.807e-01, 2.536e-01) * s0_1_2;
	r1 += M4(-7.112e-02, -2.223e-01, 9.873e-03, -3.805e-02, 8.414e-02, -3.060e-02, -1.337e-02, -1.050e-01, 5.683e-02, -1.204e-02, 4.317e-02, -9.708e-02, -1.316e-01, 2.026e-01, -3.553e-02, 2.675e-01) * s0_1_2;
	r2 += M4(6.067e-02, -5.606e-01, 2.076e-01, 8.515e-02, 1.027e-01, -1.655e-01, -1.092e-01, 1.304e-01, 1.906e-03, -3.508e-04, -1.209e-01, -3.270e-02, 1.247e-01, 2.641e-01, 5.683e-02, -2.846e-02) * s0_1_2;
	r0 += M4(-5.352e-02, -6.279e-02, -3.589e-02, -9.864e-02, 4.574e-02, -7.854e-02, -3.967e-02, 2.256e-02, -8.089e-04, -1.900e-01, -3.590e-02, -4.504e-02, 1.049e-01, -5.973e-02, 1.157e-01, -1.664e-02) * s0_2_0;
	r1 += M4(6.925e-02, -1.353e-01, -7.643e-03, -1.133e-02, 3.495e-02, -2.593e-03, 6.994e-02, -2.484e-02, 1.472e-01, 1.146e-02, 6.913e-02, 1.244e-01, 1.877e-01, 1.018e-01, 6.720e-03, 2.454e-01) * s0_2_0;
	r2 += M4(-3.002e-02, -3.357e-02, 1.084e-01, -8.085e-02, 3.422e-02, 5.723e-02, -1.044e-01, 3.333e-02, -1.101e-02, -5.565e-02, -6.310e-02, -3.821e-02, 1.557e-01, -5.482e-02, 1.065e-01, -4.867e-01) * s0_2_0;
	r0 += M4(4.000e-02, 7.537e-02, 1.253e-02, 7.073e-02, 1.398e-01, 2.271e-01, 1.976e-01, 2.552e-01, 1.606e-01, 2.171e-01, -1.704e-01, -2.716e-02, 8.066e-02, -1.513e-01, -1.260e-01, -1.480e-01) * s0_2_1;
	r1 += M4(1.855e-02, 3.550e-03, 2.656e-02, -3.287e-02, 1.832e-01, -1.942e-01, -1.371e-02, 3.153e-02, 6.579e-03, 4.742e-02, 7.640e-03, 1.371e-02, -5.900e-02, -1.524e-01, -3.349e-02, 3.221e-02) * s0_2_1;
	r2 += M4(2.145e-02, 2.586e-02, 2.183e-01, 6.932e-03, 2.959e-01, 2.615e-02, -2.467e-01, 3.613e-01, -1.315e-01, -1.424e-01, -5.837e-02, 1.954e-03, -4.017e-01, -2.803e-01, -2.446e-01, -5.138e-01) * s0_2_1;
	r0 += M4(1.689e-02, -1.865e-02, 4.628e-02, -9.573e-03, 3.221e-02, -8.376e-02, 6.708e-02, 1.353e-01, -1.696e-02, -5.519e-02, 1.615e-02, 2.061e-02, -5.816e-02, -1.181e-02, -1.279e-01, -3.770e-02) * s0_2_2;
	r1 += M4(-1.374e-02, -5.577e-02, 3.281e-02, -5.326e-03, 8.404e-02, -1.214e-01, 7.737e-02, 8.032e-02, -6.279e-02, 6.981e-02, -8.572e-02, -1.667e-02, 3.047e-02, 1.395e-01, -6.001e-02, 4.227e-02) * s0_2_2;
	r2 += M4(1.854e-02, 6.944e-02, 1.481e-01, -3.347e-02, 1.508e-01, 1.575e-01, 1.184e-01, 2.914e-02, 7.326e-02, 7.106e-02, 8.862e-02, 2.759e-02, -2.426e-01, 4.403e-02, -6.061e-02, -1.090e-01) * s0_2_2;
	r0 += V4(-8.696e-03, -1.619e-02, 1.249e-03, 2.223e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(1.056e-03, -2.117e-02, -1.120e-02, -3.689e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-3.018e-02, -1.854e-03, 7.050e-03, -1.472e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_DS] -conv4
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv3
//!BIND LUMA
//!SAVE conv4
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv3_mul * texelFetch(conv3_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-1.055e-01, 1.389e-01, 1.006e-01, 2.386e-02, -2.141e-02, -6.090e-04, 2.838e-02, -4.458e-02, 1.145e-03, 3.788e-03, 4.335e-02, 1.639e-02, 2.813e-02, -2.073e-01, -7.150e-02, 5.448e-02) * s0_0_0;
	r1 += M4(3.553e-02, 8.595e-02, 2.355e-02, 2.498e-02, -1.988e-02, -6.382e-02, 2.649e-03, 6.763e-02, 1.411e-02, 4.851e-02, -1.439e-02, 4.835e-02, 3.527e-02, -9.975e-02, -1.870e-02, -5.485e-02) * s0_0_0;
	r2 += M4(-8.278e-02, -2.511e-03, 4.932e-04, -3.022e-03, 3.544e-01, -1.714e-03, 5.726e-02, 8.488e-02, -3.069e-02, 2.241e-02, -1.250e-02, -1.007e-01, -2.900e-02, -5.379e-03, -3.558e-02, -9.846e-02) * s0_0_0;
	r0 += M4(-1.386e-01, 2.263e-03, 3.250e-01, 7.186e-02, 7.580e-02, -2.823e-02, -1.042e-01, 5.141e-02, -1.275e-01, 1.498e-01, 1.989e-01, 6.735e-03, -1.929e-01, -3.153e-01, -2.466e-01, 5.126e-02) * s0_0_1;
	r1 += M4(2.599e-01, 1.783e-01, -2.612e-02, 2.213e-02, -8.139e-02, -5.809e-02, -1.806e-02, -1.974e-02, 1.487e-01, 1.035e-01, 5.083e-02, 5.576e-02, -3.679e-01, -9.308e-02, 3.035e-02, -6.769e-02) * s0_0_1;
	r2 += M4(1.567e-02, -4.771e-02, -4.058e-02, -1.080e-01, 1.146e-01, -3.885e-02, 7.252e-02, 9.758e-02, -7.892e-02, 7.824e-03, -3.947e-02, 2.918e-02, 8.578e-04, -3.275e-03, 7.261e-02, -1.136e-01) * s0_0_1;
	r0 += M4(-6.732e-02, 3.933e-02, 1.808e-01, 6.128e-02, 1.830e-01, -8.228e-02, 7.445e-04, -2.033e-02, -2.189e-01, -1.495e-03, 6.506e-03, 2.494e-02, 1.966e-01, 4.375e-02, -3.558e-02, -9.186e-03) * s0_0_2;
	r1 += M4(1.341e-01, 6.469e-02, -3.340e-02, -1.022e-02, -1.557e-01, -6.977e-02, 4.914e-02, -5.205e-02, 6.971e-02, 2.994e-03, -7.389e-02, 1.030e-02, 5.325e-03, -2.465e-02, 8.511e-02, -9.933e-03) * s0_0_2;
	r2 += M4(-2.013e-02, -2.067e-02, 1.398e-02, 2.168e-02, 4.783e-03, -7.195e-02, 7.026e-03, 1.527e-01, 1.658e-02, 1.189e-02, 9.755e-03, -7.148e-02, 1.872e-02, -5.386e-02, 3.724e-02, 8.000e-02) * s0_0_2;
	r0 += M4(-9.122e-02, -5.550e-02, 1.265e-01, 3.657e-02, 3.314e-01, -3.136e-02, 1.287e-02, 1.975e-02, -1.716e-01, 2.017e-01, 7.056e-02, 1.172e-01, -5.052e-02, -1.444e-02, -6.087e-02, -2.475e-01) * s0_1_0;
	r1 += M4(6.715e-02, 3.333e-02, 6.750e-02, 1.294e-01, 7.259e-04, 1.904e-02, -6.832e-02, 6.557e-02, 9.545e-02, 3.831e-01, 4.196e-02, 2.570e-01, -9.636e-03, -8.401e-02, -1.112e-02, -1.317e-01) * s0_1_0;
	r2 += M4(-5.563e-01, -3.221e-02, 2.062e-01, 2.396e-02, 4.419e-01, 6.909e-02, 1.927e-01, 1.044e-01, 1.284e-01, 7.459e-02, 1.614e-01, -3.080e-02, 8.832e-02, 1.469e-03, -4.342e-01, -1.455e-01) * s0_1_0;
	r0 += M4(1.454e-01, 3.927e-03, -1.383e-01, -4.309e-01, 3.427e-01, -9.402e-02, 5.915e-02, -2.998e-01, 2.907e-01, 1.083e-01, 1.727e-01, -6.489e-02, -1.931e-01, -2.111e-01, -5.188e-02, -1.138e-01) * s0_1_1;
	r1 += M4(-2.441e-01, -7.619e-01, -1.353e-01, -1.991e-01, 8.867e-02, 3.698e-02, 5.239e-01, 2.546e-01, 6.729e-01, 3.643e-01, -2.767e-01, -1.092e-01, -3.009e-01, -3.628e-01, -1.839e-01, -7.230e-02) * s0_1_1;
	r2 += M4(-2.213e-01, 1.005e-01, -2.741e-01, 5.141e-03, -1.627e-01, -9.952e-02, -6.991e-02, 1.718e-01, -1.727e-01, 1.166e-03, -2.282e-01, 3.007e-03, 1.104e-02, 8.997e-02, -4.252e-01, -7.918e-02) * s0_1_1;
	r0 += M4(-1.268e-01, -6.835e-02, -5.900e-03, -2.012e-02, 5.081e-01, -8.319e-03, -1.690e-01, -5.992e-02, 7.269e-02, 1.770e-02, 1.154e-01, 2.452e-02, 3.230e-01, 5.403e-02, -1.314e-01, 3.758e-02) * s0_1_2;
	r1 += M4(-4.055e-01, -3.163e-02, -1.398e-01, -1.017e-01, 3.246e-02, -3.143e-02, 2.292e-01, -2.389e-02, 6.515e-02, -6.163e-03, -5.194e-02, 8.182e-02, 6.395e-02, 1.072e-01, -1.008e-02, -3.751e-02) * s0_1_2;
	r2 += M4(8.871e-03, 6.959e-02, -4.462e-03, -2.483e-01, 4.821e-02, -1.220e-01, 1.611e-01, 2.862e-01, 4.890e-02, 3.921e-02, -2.365e-02, -6.887e-02, 5.026e-02, -2.699e-01, 8.523e-02, 2.192e-01) * s0_1_2;
	r0 += M4(-2.503e-01, -3.821e-02, -2.557e-02, -8.098e-02, 1.538e-01, 2.310e-02, 1.535e-02, -4.829e-02, -2.003e-01, -5.446e-02, -6.419e-02, 1.368e-01, -1.291e-03, 3.163e-02, 1.741e-02, 2.281e-02) * s0_2_0;
	r1 += M4(6.001e-03, -3.686e-02, 6.115e-02, 1.965e-02, 1.618e-02, 1.352e-02, 4.782e-03, -3.564e-02, -3.671e-03, -8.570e-02, -1.054e-01, -1.359e-02, 1.334e-02, 4.516e-02, 1.233e-02, 2.730e-02) * s0_2_0;
	r2 += M4(4.358e-02, 7.434e-03, 4.942e-02, -5.887e-03, 1.093e-02, -1.435e-02, 3.376e-02, -1.830e-03, -6.330e-02, 1.038e-01, 9.705e-04, -4.573e-03, 4.469e-03, -4.550e-02, -7.658e-02, 4.769e-02) * s0_2_0;
	r0 += M4(-8.940e-01, 5.981e-03, 1.316e-01, -3.354e-01, 5.262e-01, 1.479e-02, -1.083e-01, 1.105e-01, 7.464e-02, -2.130e-02, 1.850e-01, 4.496e-01, 3.513e-01, 1.963e-02, -1.276e-01, 1.388e-02) * s0_2_1;
	r1 += M4(-3.000e-02, -5.184e-02, -1.438e-01, -1.958e-01, -4.263e-02, -2.395e-02, 1.111e-01, 5.093e-02, -1.140e-01, -1.071e-02, 5.847e-02, 3.004e-01, 4.255e-02, 6.148e-02, -1.606e-02, 9.401e-02) * s0_2_1;
	r2 += M4(4.106e-02, 1.596e-01, -2.763e-01, -2.294e-01, 1.637e-03, -1.984e-02, 6.959e-02, 1.536e-02, -1.157e-02, 1.762e-01, 4.581e-02, -4.943e-02, 6.013e-03, -2.188e-01, -2.600e-02, 2.603e-02) * s0_2_1;
	r0 += M4(-5.404e-01, 1.587e-02, -1.878e-01, -1.070e-01, -2.888e-01, -1.278e-02, 3.284e-02, -2.708e-02, -3.302e-01, 8.436e-03, -1.704e-01, 3.326e-02, -1.000e+00, 3.784e-02, -4.633e-03, 9.644e-02) * s0_2_2;
	r1 += M4(-7.289e-02, -2.618e-02, -3.437e-01, -8.935e-02, -2.067e-02, -5.528e-03, 1.157e-01, 1.992e-03, -1.061e-02, -1.861e-02, 1.552e-02, 7.065e-02, 5.473e-02, -4.065e-03, 1.558e-01, 9.307e-02) * s0_2_2;
	r2 += M4(-1.051e-03, 4.834e-02, 6.804e-02, -3.320e-02, 2.540e-02, 1.248e-02, 8.706e-03, 5.184e-02, -6.576e-03, 4.942e-02, -2.008e-02, -4.584e-02, 3.803e-02, -8.914e-02, 8.237e-02, 5.102e-02) * s0_2_2;
	r0 += M4(9.185e-02, 7.873e-02, 1.703e-03, -3.788e-02, 3.333e-04, 1.912e-02, 1.412e-02, -3.259e-02, -3.924e-02, -8.911e-02, -2.589e-02, 9.067e-03, -1.083e-01, 1.031e-02, -5.231e-02, 2.139e-03) * s1_0_0;
	r1 += M4(-9.855e-03, 1.449e-02, 5.203e-03, 2.493e-02, 1.479e-03, 5.629e-02, 1.465e-02, 7.645e-03, -1.346e-02, -5.858e-02, -2.678e-02, -2.407e-02, -1.498e-02, 5.096e-03, 3.915e-02, -1.377e-02) * s1_0_0;
	r2 += M4(4.715e-02, -1.703e-02, 4.284e-02, -2.847e-02, -7.176e-02, -2.314e-02, -7.501e-02, 2.123e-02, -4.375e-02, 1.949e-02, -4.924e-02, -1.163e-02, 8.950e-03, 4.182e-02, -5.631e-02, -2.721e-02) * s1_0_0;
	r0 += M4(7.068e-02, 2.901e-01, 1.420e-01, -3.279e-02, -3.321e-02, -1.414e-01, 9.665e-02, -3.729e-04, -8.875e-02, 4.197e-02, -1.989e-01, -1.016e-02, 1.169e-01, 2.691e-01, -2.882e-02, -8.569e-02) * s1_0_1;
	r1 += M4(8.504e-02, 1.450e-01, 4.115e-04, -5.241e-03, 1.926e-01, 1.104e-01, -7.994e-03, -2.238e-02, -1.362e-01, -7.198e-02, 1.421e-03, 6.157e-02, 8.573e-02, -1.651e-02, 8.321e-02, 7.065e-02) * s1_0_1;
	r2 += M4(-9.351e-02, -4.095e-02, -8.286e-02, 4.417e-02, -7.494e-02, -6.568e-02, -4.600e-02, 6.133e-02, 5.122e-02, 1.519e-01, -1.916e-02, 3.283e-02, 3.939e-02, 6.340e-02, 2.673e-02, 1.789e-01) * s1_0_1;
	r0 += M4(-6.738e-02, 1.613e-01, 8.172e-02, -2.159e-02, -1.116e-01, -1.060e-01, -9.105e-02, -1.325e-02, -3.399e-02, 4.651e-02, 2.596e-02, 2.265e-03, -1.554e-03, -1.275e-01, -1.675e-01, 5.187e-04) * s1_0_2;
	r1 += M4(8.941e-02, 6.912e-03, -5.615e-02, -3.096e-03, 1.862e-03, 6.762e-02, 6.544e-03, -2.527e-02, 6.567e-02, 5.053e-02, -4.415e-02, -2.704e-02, -1.538e-01, -6.665e-02, -9.580e-03, -7.970e-03) * s1_0_2;
	r2 += M4(-6.897e-02, -1.259e-02, 3.533e-03, -2.712e-02, -2.967e-02, -4.501e-02, -2.056e-02, -2.280e-02, -3.202e-02, 6.214e-02, 1.707e-02, -7.605e-02, 1.164e-02, -1.960e-04, -1.919e-02, -1.016e-01) * s1_0_2;
	r0 += M4(6.318e-02, -4.483e-02, 5.432e-03, 2.078e-02, -5.958e-02, -2.464e-02, -2.040e-02, -5.326e-03, 1.774e-01, 8.729e-02, -7.610e-02, 1.590e-02, -2.552e-01, -1.121e-01, 5.041e-02, -8.286e-03) * s1_1_0;
	r1 += M4(-1.483e-02, -1.909e-02, 6.299e-03, -2.211e-02, -2.716e-02, -7.053e-02, -3.194e-02, -6.321e-02, -1.635e-02, 7.267e-02, -4.855e-02, -1.209e-01, 1.603e-02, -1.919e-01, -1.429e-02, -6.287e-02) * s1_1_0;
	r2 += M4(3.391e-02, -4.285e-02, 5.542e-02, -4.604e-03, -4.302e-01, -5.285e-02, 9.445e-02, -4.018e-02, 2.290e-01, 1.059e-01, -1.761e-01, -3.396e-02, 1.292e-01, 3.082e-02, -1.595e-01, -8.452e-02) * s1_1_0;
	r0 += M4(2.134e-02, -6.909e-02, 1.273e-01, 2.078e-01, -8.404e-02, 8.026e-02, -4.560e-01, -1.021e-01, -6.312e-02, 1.882e-01, 4.126e-01, 2.114e-01, 3.086e-01, -1.500e-01, 1.597e-01, 4.418e-01) * s1_1_1;
	r1 += M4(-2.345e-01, -3.594e-01, -9.511e-03, 9.302e-02, -5.207e-01, -8.771e-03, -3.367e-01, -2.607e-01, 4.093e-01, 3.073e-01, 2.261e-01, 2.278e-01, -2.627e-01, 1.606e-01, -6.395e-02, -9.765e-02) * s1_1_1;
	r2 += M4(-2.175e-01, -2.388e-01, -2.016e-01, -2.348e-01, -1.519e-01, -9.050e-02, 8.489e-02, -4.352e-01, 7.183e-02, 2.459e-01, 2.735e-01, 1.387e-01, -2.114e-03, 1.140e-01, 4.486e-01, 3.232e-01) * s1_1_1;
	r0 += M4(-1.390e-01, -6.031e-02, 1.096e-01, 3.136e-02, -2.727e-02, -9.778e-03, -1.857e-02, -2.369e-02, 9.746e-02, -3.856e-03, 2.103e-01, 6.188e-02, -4.133e-01, 1.129e-01, 2.303e-01, -3.109e-02) * s1_1_2;
	r1 += M4(-1.183e-01, 5.845e-02, 3.602e-02, -8.474e-02, 6.601e-02, -1.450e-01, 1.727e-01, 3.990e-02, -6.118e-02, -9.333e-02, -2.223e-01, -1.404e-02, 1.772e-01, -8.267e-03, 1.450e-01, -2.308e-03) * s1_1_2;
	r2 += M4(4.297e-02, -9.084e-02, -8.968e-02, 1.840e-01, -5.717e-02, -7.728e-02, -6.810e-02, -1.955e-01, 4.372e-02, 2.261e-01, -1.345e-02, -2.475e-01, -2.912e-02, 1.008e-01, -7.890e-02, 1.362e-02) * s1_1_2;
	r0 += M4(-4.929e-02, 1.415e-02, -2.994e-02, -3.028e-02, -2.574e-01, -9.644e-03, -1.098e-02, -2.517e-02, 5.483e-02, -4.668e-02, 3.200e-02, -4.040e-02, -4.469e-01, 8.985e-03, 6.581e-03, -3.919e-02) * s1_2_0;
	r1 += M4(-9.531e-03, -1.935e-02, -1.371e-02, -1.443e-02, 2.922e-02, 2.834e-02, 5.663e-02, 9.698e-02, -1.534e-03, -2.633e-02, -8.376e-03, -9.018e-02, -2.532e-02, 1.234e-02, 4.875e-02, 4.729e-02) * s1_2_0;
	r2 += M4(2.132e-02, -2.418e-02, 3.828e-02, -2.839e-02, 4.649e-02, -5.719e-02, -6.732e-03, -1.803e-02, -5.450e-02, 5.659e-02, -1.119e-01, 5.483e-02, -1.288e-02, -1.188e-02, 1.920e-02, 6.316e-02) * s1_2_0;
	r0 += M4(2.463e-01, 3.794e-02, -8.369e-03, -6.269e-03, -8.547e-01, -5.002e-02, 2.599e-02, 5.835e-02, -3.247e-01, -3.474e-02, 2.485e-02, -2.234e-02, 6.895e-01, 4.745e-02, -1.577e-01, -2.352e-01) * s1_2_1;
	r1 += M4(1.684e-02, 1.074e-01, -2.131e-02, 7.732e-02, 7.528e-02, -5.940e-02, -2.274e-01, -2.520e-02, -1.151e-01, -1.208e-01, 1.047e-01, -8.231e-02, 9.350e-02, -2.281e-02, -1.488e-01, 6.844e-02) * s1_2_1;
	r2 += M4(-6.557e-02, -6.144e-02, -2.529e-01, -1.213e-01, -7.212e-02, -3.436e-02, 1.042e-01, 3.576e-02, -2.413e-02, 1.812e-01, 1.748e-01, 6.941e-03, 8.875e-02, -5.603e-03, 2.627e-01, 5.829e-02) * s1_2_1;
	r0 += M4(-4.971e-01, -1.275e-02, -3.949e-04, 2.594e-02, 2.836e-01, -5.830e-02, 2.091e-02, -9.302e-02, 2.878e-01, 3.876e-04, -1.528e-01, -1.082e-01, -5.045e-01, -2.693e-02, -1.888e-01, 7.254e-03) * s1_2_2;
	r1 += M4(4.983e-02, 1.807e-03, -6.161e-02, 3.479e-02, -6.906e-02, 4.569e-02, 9.350e-02, -1.023e-01, -4.716e-03, 4.962e-03, -1.705e-01, -8.281e-02, -6.137e-02, 2.917e-02, 2.588e-01, -2.723e-02) * s1_2_2;
	r2 += M4(5.262e-03, -4.841e-02, 4.575e-02, 1.892e-02, -1.793e-02, -2.643e-02, -1.304e-01, 1.917e-02, -1.959e-02, 7.019e-02, -2.182e-02, -6.576e-02, -1.802e-02, 7.348e-02, -5.188e-02, 6.172e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(5.793e-02, 2.786e-02, -1.941e-02, 4.921e-02, -5.281e-02, -8.963e-03, -4.296e-02, -6.465e-02, -2.624e-02, -1.035e-02, 1.909e-02, -7.010e-03, -3.860e-02, -9.421e-03, 2.584e-02, -8.808e-03) * s0_0_0;
	r1 += M4(3.071e-02, 7.559e-02, -2.057e-02, 2.020e-02, -9.886e-02, -2.669e-01, -9.890e-03, -7.298e-02, -1.837e-02, -7.078e-02, -1.288e-02, -2.382e-02, 1.241e-02, -4.527e-03, -2.431e-02, 3.064e-02) * s0_0_0;
	r2 += M4(3.255e-02, 2.057e-02, 1.768e-02, 5.337e-02, -4.459e-01, 1.070e-03, 3.949e-03, -2.289e-02, -3.157e-03, 2.472e-02, -8.143e-02, -9.849e-03, -1.342e-02, -2.765e-02, 1.967e-02, 6.545e-03) * s0_0_0;
	r0 += M4(-1.070e-01, -1.623e-02, 2.046e-01, 4.787e-02, 1.485e-01, -1.116e-01, -1.765e-01, -8.065e-02, 1.091e-01, 7.021e-02, -1.416e-01, -6.582e-02, 8.253e-02, -8.306e-02, -9.999e-02, -8.062e-03) * s0_0_1;
	r1 += M4(-2.542e-02, -1.237e-01, -4.307e-02, -8.667e-02, -2.720e-01, -1.690e-01, -9.643e-02, -1.635e-01, 4.372e-02, 1.326e-01, 5.508e-02, 5.028e-02, -3.970e-02, -9.403e-02, 7.444e-03, 4.181e-02) * s0_0_1;
	r2 += M4(8.132e-02, 2.210e-02, 4.352e-03, 1.137e-01, 1.343e-01, -2.736e-02, -7.808e-02, -8.118e-02, -2.111e-03, 1.694e-01, 6.630e-02, 1.264e-01, 3.721e-02, 6.070e-02, -1.525e-02, -9.103e-02) * s0_0_1;
	r0 += M4(4.202e-02, -5.274e-02, 3.069e-04, 2.920e-02, -3.191e-02, -1.340e-02, 2.576e-02, -8.074e-03, 1.378e-01, 2.026e-02, 8.083e-03, -2.488e-03, 4.771e-02, -5.814e-02, -1.816e-01, 1.382e-02) * s0_0_2;
	r1 += M4(-7.114e-03, 3.677e-02, 1.108e-01, -2.387e-02, -7.152e-02, -2.163e-02, 3.184e-02, 1.824e-02, 3.145e-02, -2.446e-03, -1.430e-02, -2.705e-03, -5.471e-02, 2.252e-02, -6.374e-02, 6.703e-02) * s0_0_2;
	r2 += M4(-4.363e-02, 3.854e-02, -1.827e-02, 1.397e-01, -7.198e-03, 4.241e-02, 3.381e-02, 1.438e-01, -2.909e-02, 4.757e-02, -1.115e-02, -7.466e-02, -1.924e-02, 2.005e-01, -1.663e-02, -2.587e-01) * s0_0_2;
	r0 += M4(-1.241e-01, 2.302e-02, 2.010e-02, -7.543e-02, 8.910e-02, -5.738e-02, -8.270e-02, -1.105e-01, 2.779e-01, 6.051e-02, 4.681e-02, 8.601e-02, -1.380e-01, 7.734e-03, 2.118e-02, 2.923e-03) * s0_1_0;
	r1 += M4(3.625e-02, 5.886e-02, 1.182e-01, -2.001e-02, 1.377e-02, -1.392e-01, -5.313e-02, -7.006e-02, 8.778e-03, 1.059e-01, -9.350e-02, -1.031e-02, -7.100e-03, 4.839e-03, 5.133e-02, 6.034e-02) * s0_1_0;
	r2 += M4(-1.960e-02, 4.496e-02, -8.376e-02, -3.912e-02, -2.338e-01, 3.203e-02, 6.106e-02, 4.649e-02, -1.189e-01, 1.585e-01, -3.378e-02, 5.320e-02, 4.430e-02, -6.179e-02, 6.592e-02, -5.500e-02) * s0_1_0;
	r0 += M4(1.944e-01, 2.338e-01, -6.869e-02, -2.646e-02, -5.461e-01, -8.131e-03, -3.857e-01, -1.850e-01, 1.918e-01, -9.204e-02, 2.502e-01, 2.495e-01, 9.988e-02, -5.018e-02, -1.188e-01, 2.336e-02) * s0_1_1;
	r1 += M4(3.988e-01, 4.676e-01, -2.575e-01, 2.026e-01, -4.364e-01, -2.084e-01, -4.219e-02, -6.765e-02, 6.714e-02, -1.717e-01, 2.503e-01, 1.255e-01, -3.975e-02, -5.630e-02, -1.824e-02, 1.351e-01) * s0_1_1;
	r2 += M4(1.194e-01, 3.748e-02, 4.138e-01, 4.861e-01, -2.645e-03, -4.188e-02, -3.896e-01, -1.023e-01, -1.449e-01, 1.897e-01, -1.138e-01, 1.615e-01, -5.127e-02, -3.278e-01, 3.377e-02, -6.901e-02) * s0_1_1;
	r0 += M4(2.596e-01, 7.380e-02, 1.879e-02, 7.640e-02, -6.183e-01, -1.187e-01, -1.100e-01, -1.688e-02, -1.237e-01, -2.506e-02, -1.088e-02, -2.494e-02, -3.579e-01, -4.396e-02, -9.723e-02, 2.628e-02) * s0_1_2;
	r1 += M4(1.076e-01, -5.207e-02, 3.096e-01, 8.768e-02, -3.952e-02, -7.472e-03, 1.337e-01, -2.164e-02, -1.062e-01, -8.434e-03, -1.611e-01, -4.507e-02, -6.421e-02, -4.117e-02, -1.232e-01, 2.043e-02) * s0_1_2;
	r2 += M4(-3.328e-02, 1.277e-01, -5.598e-02, -9.204e-02, -1.537e-02, 1.112e-01, 7.951e-02, 1.966e-01, 1.770e-02, 8.691e-04, 3.556e-03, 5.129e-02, -2.218e-02, -5.204e-03, -4.816e-02, -1.111e-01) * s0_1_2;
	r0 += M4(-1.281e-01, 9.047e-03, 2.023e-02, 1.130e-01, -1.278e-01, 2.550e-02, 4.455e-02, 6.418e-02, 1.748e-01, -2.717e-02, -1.517e-02, -6.811e-02, 8.266e-02, 3.924e-03, -2.769e-02, -3.022e-03) * s0_2_0;
	r1 += M4(1.079e-02, -3.193e-03, -2.679e-02, 7.500e-03, -6.409e-03, 3.969e-03, -2.226e-02, 1.291e-02, 1.221e-02, -1.226e-02, 3.007e-04, -5.995e-02, -1.825e-02, -2.235e-02, -1.619e-02, -6.192e-03) * s0_2_0;
	r2 += M4(-2.420e-02, 4.065e-02, -1.188e-01, 5.614e-02, -5.483e-02, 7.370e-02, -2.615e-02, -2.093e-02, 5.041e-02, 5.942e-02, 8.033e-02, 7.664e-04, 3.448e-02, 9.694e-05, -5.401e-03, 1.025e-02) * s0_2_0;
	r0 += M4(7.453e-01, -2.761e-02, 7.558e-02, 1.827e-01, -2.039e-01, 2.846e-02, 1.485e-01, 7.573e-02, -5.631e-01, -3.786e-03, -6.182e-02, -1.495e-01, -2.023e-01, 7.576e-04, -7.597e-03, 1.429e-02) * s0_2_1;
	r1 += M4(-7.217e-02, -1.002e-01, 1.148e-01, -7.740e-03, 9.931e-02, 7.545e-02, -1.130e-01, 5.346e-02, -1.058e-02, 2.685e-02, 4.805e-02, -7.027e-02, 3.466e-03, 4.225e-02, 1.929e-02, 1.237e-01) * s0_2_1;
	r2 += M4(1.616e-02, 7.956e-02, 1.645e-01, 1.213e-01, -1.772e-02, 1.313e-01, -1.588e-01, -2.800e-03, -6.669e-02, 5.519e-03, -1.386e-01, -5.193e-02, -1.113e-02, -8.528e-02, -1.451e-02, -7.085e-03) * s0_2_1;
	r0 += M4(1.623e-01, -3.229e-02, -5.804e-02, -2.671e-02, -1.000e+00, -9.439e-04, 3.222e-02, -3.303e-02, -5.029e-01, 5.575e-03, -2.335e-02, -3.434e-02, -2.477e-01, 1.449e-02, 2.357e-02, -3.183e-02) * s0_2_2;
	r1 += M4(-5.298e-02, -6.673e-03, 1.501e-02, -5.324e-02, -4.905e-03, -9.225e-04, -1.008e-02, -1.270e-02, 1.103e-02, -1.079e-02, -8.626e-02, -4.128e-02, 4.016e-02, 1.471e-02, -6.018e-02, 6.587e-03) * s0_2_2;
	r2 += M4(-9.100e-03, 3.057e-02, -4.996e-02, -7.537e-02, 1.568e-02, 5.162e-02, 9.676e-03, 2.940e-02, -4.185e-03, -1.847e-02, -1.765e-02, 1.886e-02, 1.769e-03, -1.740e-02, -2.001e-02, -4.691e-02) * s0_2_2;
	r0 += V4(-2.040e-02, -1.480e-02, -1.592e-02, -1.825e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.581e-02, -1.523e-02, -1.309e-02, -1.260e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-1.799e-02, -9.665e-03, -1.202e-02, -1.983e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_4x12_DS] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv4
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv4_mul * texelFetch(conv4_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv4_mul * texelFetch(conv4_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv4_mul * texelFetch(conv4_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(2.388e-02, -2.179e-02, -1.495e-02, -1.356e-02, 5.239e-03, -2.309e-03, 7.517e-04, 9.522e-04, -2.581e-02, -5.443e-03, 8.823e-03, -3.491e-03, 2.703e-02, -2.640e-03, -9.476e-03, 5.470e-03) * s0_0_0;
	r0 += M4(-4.142e-02, 1.382e-01, -8.695e-03, 4.808e-02, 1.351e-03, 4.193e-03, -7.097e-03, -3.681e-03, -1.436e-02, -1.713e-02, 1.211e-02, 2.090e-02, 1.362e-01, 9.544e-02, 2.338e-02, -3.694e-03) * s0_0_1;
	r0 += M4(-2.989e-03, -2.343e-02, -4.827e-04, -1.972e-02, -9.735e-03, -5.032e-03, -3.987e-03, -6.905e-03, 8.789e-03, 4.511e-03, 7.229e-03, 8.087e-03, -1.911e-02, 3.528e-02, -1.214e-02, 2.181e-03) * s0_0_2;
	r0 += M4(1.270e-02, -1.064e-03, 6.812e-02, -1.000e-02, 1.484e-02, 1.731e-02, 6.290e-03, 3.235e-03, 6.860e-02, 9.856e-03, -1.395e-01, 3.042e-02, 2.633e-02, -2.918e-03, 7.205e-02, -9.850e-03) * s0_1_0;
	r0 += M4(-1.438e-02, -5.431e-04, -3.300e-02, 7.007e-02, -3.142e-01, -2.705e-01, -2.310e-02, -9.285e-03, 1.468e-01, 2.173e-01, -5.744e-02, -3.330e-01, 1.171e-01, 8.227e-02, -5.113e-01, -1.525e-01) * s0_1_1;
	r0 += M4(1.387e-03, -2.067e-03, -2.032e-03, -2.885e-03, 1.220e-02, -6.065e-02, -6.519e-03, -5.779e-03, 5.931e-03, 4.504e-02, -1.373e-03, 4.382e-02, -2.055e-02, -1.359e-02, 2.163e-02, -1.851e-01) * s0_1_2;
	r0 += M4(-2.418e-03, -2.180e-03, -2.329e-03, 2.661e-03, -7.243e-03, -7.124e-03, 7.299e-02, 6.392e-03, 6.362e-03, 3.663e-03, 4.165e-02, 3.493e-03, -6.592e-03, 1.473e-03, 3.676e-03, 1.119e-03) * s0_2_0;
	r0 += M4(-1.542e-03, -3.847e-04, -3.249e-04, -1.279e-03, 8.499e-02, 5.020e-02, 9.531e-02, 1.557e-01, -8.286e-04, 6.556e-03, 5.992e-02, 8.960e-02, -1.044e-02, -1.032e-02, 4.056e-02, 2.354e-02) * s0_2_1;
	r0 += M4(-7.458e-04, 1.362e-04, 7.283e-05, 1.277e-03, -9.639e-03, -1.936e-03, 3.132e-02, 6.958e-02, 1.119e-03, -3.076e-03, 9.122e-04, 2.042e-02, -6.981e-03, -1.187e-02, -6.684e-03, -7.752e-03) * s0_2_2;
	r0 += M4(2.814e-03, 3.148e-03, -1.132e-03, -6.398e-03, 1.439e-02, 2.829e-03, -2.626e-04, 5.178e-03, 8.911e-02, 6.704e-03, 3.282e-03, -7.610e-03, 4.977e-04, 4.986e-04, 2.492e-03, -4.189e-03) * s1_0_0;
	r0 += M4(-2.823e-02, 3.015e-02, 2.882e-03, -1.051e-02, 4.334e-02, -8.423e-03, -4.752e-03, 7.005e-03, 2.837e-02, -2.014e-01, -1.233e-02, 9.915e-03, 1.724e-01, 1.304e-01, 5.953e-03, 1.205e-02) * s1_0_1;
	r0 += M4(-2.935e-03, 1.517e-03, -6.146e-03, -4.596e-03, 5.094e-03, -1.761e-02, -4.354e-03, -5.804e-03, -3.110e-03, 1.074e-02, -3.243e-03, -4.863e-03, 3.929e-03, 5.750e-02, 7.298e-03, 1.157e-02) * s1_0_2;
	r0 += M4(6.277e-02, 1.277e-02, 7.593e-02, 9.044e-03, -2.377e-02, -2.143e-02, 1.635e-02, -1.530e-03, 7.029e-02, 4.014e-03, 1.431e-01, 1.862e-02, -5.418e-02, 1.065e-02, -2.371e-02, 3.166e-02) * s1_1_0;
	r0 += M4(1.483e-01, -5.953e-01, 1.826e-02, 3.266e-01, -5.438e-01, 2.192e-01, 1.694e-01, 1.648e-02, 4.286e-02, -1.889e-01, 7.104e-02, -4.457e-01, -1.159e-01, -1.840e-01, 2.338e-01, -3.845e-02) * s1_1_1;
	r0 += M4(-3.541e-03, 4.060e-05, 1.047e-02, -1.971e-02, -3.058e-02, 6.355e-02, -1.071e-03, 5.601e-03, -2.668e-03, 2.787e-02, -7.114e-04, 3.528e-02, 7.299e-03, 7.689e-02, -5.682e-03, 1.274e-01) * s1_1_2;
	r0 += M4(-1.342e-02, -7.935e-04, -3.368e-02, -5.183e-03, -3.825e-03, -2.990e-03, -5.412e-02, -5.920e-03, 5.003e-05, -5.121e-04, 7.959e-03, -3.997e-03, 5.189e-03, -1.387e-03, -1.053e-02, 1.819e-03) * s1_2_0;
	r0 += M4(-2.544e-02, 3.770e-02, -5.524e-03, 9.469e-02, -2.353e-02, -4.780e-02, 1.780e-01, -6.772e-02, 1.365e-03, -1.446e-05, 1.205e-02, 4.146e-02, 1.905e-02, 6.786e-03, -7.593e-02, -5.630e-02) * s1_2_1;
	r0 += M4(4.612e-03, 1.180e-02, -6.454e-03, -5.981e-03, 1.082e-03, -3.748e-02, -2.229e-02, -3.288e-02, -3.184e-03, 7.064e-04, -5.054e-03, 1.021e-02, 3.146e-04, 1.630e-02, 8.987e-03, 1.049e-02) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-2.370e-04, -8.572e-04, -2.001e-03, 7.761e-04, 2.123e-02, 1.536e-02, 1.274e-02, -7.336e-03, 2.614e-03, -1.381e-03, -4.164e-03, -4.223e-03, 1.770e-02, 4.606e-04, 3.006e-03, 2.284e-03) * s0_0_0;
	r0 += M4(-3.455e-02, -6.941e-03, -1.279e-02, -4.495e-03, -6.119e-03, -2.816e-03, 4.406e-02, 4.285e-02, -1.856e-01, -9.240e-03, 2.836e-02, -2.436e-02, 3.072e-02, 3.407e-02, 1.014e-02, -6.580e-03) * s0_0_1;
	r0 += M4(-2.424e-02, -8.274e-03, 1.670e-02, -1.367e-03, 1.744e-02, 1.847e-02, -4.058e-03, 1.788e-02, 4.189e-04, 7.005e-02, -6.018e-04, -8.652e-03, 6.124e-04, -8.224e-03, 1.516e-04, 2.652e-03) * s0_0_2;
	r0 += M4(-5.683e-03, -9.489e-03, 3.950e-04, -6.793e-03, 8.911e-03, 5.503e-02, 1.017e-02, 5.414e-02, 7.029e-03, 2.001e-03, 3.845e-03, -5.365e-03, 8.470e-02, -3.136e-03, 6.568e-02, -2.159e-04) * s0_1_0;
	r0 += M4(-1.289e-01, 2.727e-02, -6.540e-02, -2.783e-03, -1.609e-01, -1.479e-01, -1.606e-01, -1.509e-01, -1.125e-01, 2.898e-02, -4.326e-01, 6.509e-02, -2.135e-01, 2.303e-01, -9.698e-02, 1.964e-01) * s0_1_1;
	r0 += M4(2.749e-01, -1.897e-01, 6.079e-02, -6.428e-02, 3.901e-02, -1.324e-02, 3.964e-02, -1.455e-02, 2.624e-03, 1.321e-01, 2.834e-03, 1.880e-01, 1.176e-02, -6.067e-02, 6.097e-03, -4.895e-02) * s0_1_2;
	r0 += M4(-2.229e-03, -4.046e-04, -4.468e-03, -4.846e-03, 8.917e-03, -1.339e-02, 1.825e-02, 1.288e-02, 6.357e-03, -1.097e-03, 8.546e-03, 1.583e-03, 8.010e-03, 4.861e-03, 4.602e-02, 3.677e-03) * s0_2_0;
	r0 += M4(-1.234e-02, -1.343e-02, -1.067e-01, -3.090e-03, 4.602e-02, 4.333e-02, -3.324e-03, -3.199e-03, -7.482e-03, -2.385e-03, 5.757e-02, -9.232e-03, 3.198e-02, 1.649e-02, -6.421e-02, 9.351e-02) * s0_2_1;
	r0 += M4(2.706e-02, 6.566e-02, 1.387e-01, -3.276e-02, -5.716e-03, 1.415e-02, 1.209e-02, 1.568e-02, -2.524e-03, -5.214e-03, -1.433e-03, 1.958e-02, 2.355e-03, -1.244e-02, 1.034e-02, -3.471e-02) * s0_2_2;
	r0 += V4(-2.068e-10, -4.688e-10, -2.061e-10, -2.685e-10);
	r0 = r0;
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
